Fix agent output validation to prevent false verified status (#807)

* Changes from fix/cursor-fix

* feat: Enhance provider error messages with diagnostic context, address test failure, fix port change, move playwright tests to different port

* Update apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* ci: Update test server port from 3008 to 3108 and add environment configuration

* fix: Correct typo in health endpoint URL and standardize port env vars

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
gsxdsm
2026-02-24 20:18:40 -08:00
committed by GitHub
parent 0330c70261
commit 51e9a23ba1
36 changed files with 1610 additions and 104 deletions

View File

@@ -46,7 +46,8 @@ jobs:
echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV
env:
PORT: 3008
PORT: 3108
TEST_SERVER_PORT: 3108
NODE_ENV: test
# Use a deterministic API key so Playwright can log in reliably
AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
@@ -81,13 +82,13 @@ jobs:
# Wait for health endpoint
for i in {1..60}; do
if curl -s -f http://localhost:3008/api/health > /dev/null 2>&1; then
if curl -s -f http://localhost:3108/api/health > /dev/null 2>&1; then
echo "Backend server is ready!"
echo "=== Backend logs ==="
cat backend.log
echo ""
echo "Health check response:"
curl -s http://localhost:3008/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3008/api/health 2>/dev/null || echo 'No response')"
curl -s http://localhost:3108/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3108/api/health 2>/dev/null || echo 'No response')"
exit 0
fi
@@ -111,11 +112,11 @@ jobs:
ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
echo ""
echo "=== Port status ==="
netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
lsof -i :3008 2>/dev/null || echo "lsof not available or port not in use"
netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"
lsof -i :3108 2>/dev/null || echo "lsof not available or port not in use"
echo ""
echo "=== Health endpoint test ==="
curl -v http://localhost:3008/api/health 2>&1 || echo "Health endpoint failed"
curl -v http://localhost:3108/api/health 2>&1 || echo "Health endpoint failed"
# Kill the server process if it's still hanging
if kill -0 $SERVER_PID 2>/dev/null; then
@@ -132,8 +133,8 @@ jobs:
run: npm run test --workspace=apps/ui
env:
CI: true
VITE_SERVER_URL: http://localhost:3008
SERVER_URL: http://localhost:3008
VITE_SERVER_URL: http://localhost:3108
SERVER_URL: http://localhost:3108
VITE_SKIP_SETUP: 'true'
# Keep UI-side login/defaults consistent
AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
@@ -148,7 +149,7 @@ jobs:
ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
echo ""
echo "=== Port status ==="
netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"
- name: Upload Playwright report
uses: actions/upload-artifact@v4

View File

@@ -52,6 +52,12 @@ HOST=0.0.0.0
# Port to run the server on
PORT=3008
# Port to run the server on for testing
TEST_SERVER_PORT=3108
# Port to run the UI on for testing
TEST_PORT=3107
# Data directory for sessions and metadata
DATA_DIR=./data

View File

@@ -389,9 +389,14 @@ export class CopilotProvider extends CliProvider {
case 'session.error': {
const errorEvent = sdkEvent as SdkSessionErrorEvent;
const enrichedError =
errorEvent.data.message ||
(errorEvent.data.code
? `Copilot agent error (code: ${errorEvent.data.code})`
: 'Copilot agent error');
return {
type: 'error',
error: errorEvent.data.message || 'Unknown error',
error: enrichedError,
};
}

View File

@@ -562,10 +562,14 @@ export class CursorProvider extends CliProvider {
const resultEvent = cursorEvent as CursorResultEvent;
if (resultEvent.is_error) {
const errorText = resultEvent.error || resultEvent.result || '';
const enrichedError =
errorText ||
`Cursor agent failed (duration: ${resultEvent.duration_ms}ms, subtype: ${resultEvent.subtype}, session: ${resultEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: resultEvent.session_id,
error: resultEvent.error || resultEvent.result || 'Unknown error',
error: enrichedError,
};
}

View File

@@ -381,10 +381,13 @@ export class GeminiProvider extends CliProvider {
const resultEvent = geminiEvent as GeminiResultEvent;
if (resultEvent.status === 'error') {
const enrichedError =
resultEvent.error ||
`Gemini agent failed (duration: ${resultEvent.stats?.duration_ms ?? 'unknown'}ms, session: ${resultEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: resultEvent.session_id,
error: resultEvent.error || 'Unknown error',
error: enrichedError,
};
}
@@ -401,10 +404,12 @@ export class GeminiProvider extends CliProvider {
case 'error': {
const errorEvent = geminiEvent as GeminiResultEvent;
const enrichedError =
errorEvent.error || `Gemini agent failed (session: ${errorEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: errorEvent.session_id,
error: errorEvent.error || 'Unknown error',
error: enrichedError,
};
}

View File

@@ -296,8 +296,28 @@ export class AgentExecutor {
}
}
} else if (msg.type === 'error') {
throw new Error(AgentExecutor.sanitizeProviderError(msg.error));
} else if (msg.type === 'result' && msg.subtype === 'success') scheduleWrite();
const sanitized = AgentExecutor.sanitizeProviderError(msg.error);
logger.error(
`[execute] Feature ${featureId} received error from provider. ` +
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
);
throw new Error(sanitized);
} else if (msg.type === 'result') {
if (msg.subtype === 'success') {
scheduleWrite();
} else if (msg.subtype?.startsWith('error')) {
// Non-success result subtypes from the SDK (error_max_turns, error_during_execution, etc.)
logger.error(
`[execute] Feature ${featureId} ended with error subtype: ${msg.subtype}. ` +
`session_id=${msg.session_id ?? 'none'}`
);
throw new Error(`Agent execution ended with: ${msg.subtype}`);
} else {
logger.warn(
`[execute] Feature ${featureId} received unhandled result subtype: ${msg.subtype}`
);
}
}
}
} finally {
clearInterval(streamHeartbeat);
@@ -447,16 +467,28 @@ export class AgentExecutor {
});
}
} else if (msg.type === 'error') {
// Clean the error: strip ANSI codes and redundant "Error: " prefix
const cleanedError =
(msg.error || `Error during task ${task.id}`)
.replace(/\x1b\[[0-9;]*m/g, '')
.replace(/^Error:\s*/i, '')
.trim() || `Error during task ${task.id}`;
throw new Error(cleanedError);
} else if (msg.type === 'result' && msg.subtype === 'success') {
taskOutput += msg.result || '';
responseText += msg.result || '';
const fallback = `Error during task ${task.id}`;
const sanitized = AgentExecutor.sanitizeProviderError(msg.error || fallback);
logger.error(
`[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
);
throw new Error(sanitized);
} else if (msg.type === 'result') {
if (msg.subtype === 'success') {
taskOutput += msg.result || '';
responseText += msg.result || '';
} else if (msg.subtype?.startsWith('error')) {
logger.error(
`[executeTasksLoop] Feature ${featureId} task ${task.id} ended with error subtype: ${msg.subtype}. ` +
`session_id=${msg.session_id ?? 'none'}`
);
throw new Error(`Agent execution ended with: ${msg.subtype}`);
} else {
logger.warn(
`[executeTasksLoop] Feature ${featureId} task ${task.id} received unhandled result subtype: ${msg.subtype}`
);
}
}
}
if (!taskCompleteDetected)

View File

@@ -60,6 +60,12 @@ import type {
const logger = createLogger('ExecutionService');
/** Marker written by agent-executor for each tool invocation. */
const TOOL_USE_MARKER = '🔧 Tool:';
/** Minimum trimmed output length to consider agent work meaningful. */
const MIN_MEANINGFUL_OUTPUT_LENGTH = 200;
export class ExecutionService {
constructor(
private eventBus: TypedEventBus,
@@ -409,7 +415,41 @@ Please continue from where you left off and complete all remaining tasks. Use th
}
}
const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified';
// Read agent output before determining final status.
// CLI-based providers (Cursor, Codex, etc.) may exit quickly without doing
// meaningful work. Check output to avoid prematurely marking as 'verified'.
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
let agentOutput = '';
try {
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
} catch {
/* */
}
// Determine if the agent did meaningful work by checking for tool usage
// indicators in the output. The agent executor writes "🔧 Tool:" markers
// each time a tool is invoked. No tool usage suggests the CLI exited
// without performing implementation work.
const hasToolUsage = agentOutput.includes(TOOL_USE_MARKER);
const isOutputTooShort = agentOutput.trim().length < MIN_MEANINGFUL_OUTPUT_LENGTH;
const agentDidWork = hasToolUsage && !isOutputTooShort;
let finalStatus: 'verified' | 'waiting_approval';
if (feature.skipTests) {
finalStatus = 'waiting_approval';
} else if (!agentDidWork) {
// Agent didn't produce meaningful output (e.g., CLI exited quickly).
// Route to waiting_approval so the user can review and re-run.
finalStatus = 'waiting_approval';
logger.warn(
`[executeFeature] Feature ${featureId}: agent produced insufficient output ` +
`(${agentOutput.trim().length}/${MIN_MEANINGFUL_OUTPUT_LENGTH} chars, toolUsage=${hasToolUsage}). ` +
`Setting status to waiting_approval instead of verified.`
);
} else {
finalStatus = 'verified';
}
await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
this.recordSuccessFn();
@@ -421,13 +461,6 @@ Please continue from where you left off and complete all remaining tasks. Use th
const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
try {
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
let agentOutput = '';
try {
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
} catch {
/* */
}
if (agentOutput) {
const summary = extractSummary(agentOutput);
if (summary) await this.saveFeatureSummaryFn(projectPath, featureId, summary);

View File

@@ -0,0 +1,20 @@
import { describe, it, expect } from 'vitest';
import { normalizeThinkingLevelForModel } from '@automaker/types';
describe('normalizeThinkingLevelForModel', () => {
it('preserves explicitly selected none for Opus models', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'none')).toBe('none');
});
it('falls back to none when Opus receives an unsupported manual thinking level', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'medium')).toBe('none');
});
it('keeps adaptive for Opus when adaptive is selected', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'adaptive')).toBe('adaptive');
});
it('preserves supported manual levels for non-Opus models', () => {
expect(normalizeThinkingLevelForModel('claude-sonnet', 'high')).toBe('high');
});
});

View File

@@ -397,6 +397,45 @@ describe('copilot-provider.ts', () => {
});
});
it('should use error code in fallback when session.error message is empty', () => {
const event = {
type: 'session.error',
data: { message: '', code: 'RATE_LIMIT_EXCEEDED' },
};
const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toContain('RATE_LIMIT_EXCEEDED');
expect(result!.error).not.toBe('Unknown error');
});
it('should return generic "Copilot agent error" fallback when both message and code are empty', () => {
const event = {
type: 'session.error',
data: { message: '', code: '' },
};
const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toBe('Copilot agent error');
// Must NOT be the old opaque 'Unknown error'
expect(result!.error).not.toBe('Unknown error');
});
it('should return generic "Copilot agent error" fallback when data has no code field', () => {
const event = {
type: 'session.error',
data: { message: '' },
};
const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toBe('Copilot agent error');
});
it('should return null for unknown event types', () => {
const event = { type: 'unknown.event' };

View File

@@ -1,4 +1,4 @@
import { describe, it, expect } from 'vitest';
import { describe, it, expect, beforeEach } from 'vitest';
import { CursorProvider } from '@/providers/cursor-provider.js';
describe('cursor-provider.ts', () => {
@@ -36,4 +36,122 @@ describe('cursor-provider.ts', () => {
expect(args).not.toContain('--resume');
});
});
describe('normalizeEvent - result error handling', () => {
let provider: CursorProvider;
beforeEach(() => {
provider = Object.create(CursorProvider.prototype) as CursorProvider;
});
it('returns error message from resultEvent.error when is_error=true', () => {
const event = {
type: 'result',
is_error: true,
error: 'Rate limit exceeded',
result: '',
subtype: 'error',
duration_ms: 3000,
session_id: 'sess-123',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
expect(msg!.error).toBe('Rate limit exceeded');
});
it('falls back to resultEvent.result when error field is empty and is_error=true', () => {
const event = {
type: 'result',
is_error: true,
error: '',
result: 'Process terminated unexpectedly',
subtype: 'error',
duration_ms: 5000,
session_id: 'sess-456',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
expect(msg!.error).toBe('Process terminated unexpectedly');
});
it('builds diagnostic fallback when both error and result are empty and is_error=true', () => {
const event = {
type: 'result',
is_error: true,
error: '',
result: '',
subtype: 'error',
duration_ms: 5000,
session_id: 'sess-789',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
// Should contain diagnostic info rather than 'Unknown error'
expect(msg!.error).toContain('5000ms');
expect(msg!.error).toContain('sess-789');
expect(msg!.error).not.toBe('Unknown error');
});
it('preserves session_id in error message', () => {
const event = {
type: 'result',
is_error: true,
error: 'Timeout occurred',
result: '',
subtype: 'error',
duration_ms: 30000,
session_id: 'my-session-id',
};
const msg = provider.normalizeEvent(event);
expect(msg!.session_id).toBe('my-session-id');
});
it('uses "none" when session_id is missing from diagnostic fallback', () => {
const event = {
type: 'result',
is_error: true,
error: '',
result: '',
subtype: 'error',
duration_ms: 5000,
// session_id intentionally omitted
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
expect(msg!.error).toContain('none');
expect(msg!.error).not.toContain('undefined');
});
it('returns success result when is_error=false', () => {
const event = {
type: 'result',
is_error: false,
error: '',
result: 'Completed successfully',
subtype: 'success',
duration_ms: 2000,
session_id: 'sess-ok',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('result');
expect(msg!.subtype).toBe('success');
});
});
});

View File

@@ -1,5 +1,6 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { GeminiProvider } from '@/providers/gemini-provider.js';
import type { ProviderMessage } from '@automaker/types';
describe('gemini-provider.ts', () => {
let provider: GeminiProvider;
@@ -116,4 +117,140 @@ describe('gemini-provider.ts', () => {
expect(args[modelIndex + 1]).toBe('gemini-2.5-pro');
});
});
describe('normalizeEvent - error handling', () => {
it('returns error from result event when status=error and error field is set', () => {
const event = {
type: 'result',
status: 'error',
error: 'Model overloaded',
session_id: 'sess-gemini-1',
stats: { duration_ms: 4000, total_tokens: 0 },
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toBe('Model overloaded');
expect(msg.session_id).toBe('sess-gemini-1');
});
it('builds diagnostic fallback when result event has status=error but empty error field', () => {
const event = {
type: 'result',
status: 'error',
error: '',
session_id: 'sess-gemini-2',
stats: { duration_ms: 7500, total_tokens: 0 },
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
// Diagnostic info should be present instead of 'Unknown error'
expect(msg.error).toContain('7500ms');
expect(msg.error).toContain('sess-gemini-2');
expect(msg.error).not.toBe('Unknown error');
});
it('builds fallback with "unknown" duration when stats are missing', () => {
const event = {
type: 'result',
status: 'error',
error: '',
session_id: 'sess-gemini-nostats',
// no stats field
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toContain('unknown');
});
it('returns error from standalone error event with error field set', () => {
const event = {
type: 'error',
error: 'API key invalid',
session_id: 'sess-gemini-3',
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toBe('API key invalid');
});
it('builds diagnostic fallback when standalone error event has empty error field', () => {
const event = {
type: 'error',
error: '',
session_id: 'sess-gemini-empty',
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
// Should include session_id, not just 'Unknown error'
expect(msg.error).toContain('sess-gemini-empty');
expect(msg.error).not.toBe('Unknown error');
});
it('builds fallback mentioning "none" when session_id is missing from error event', () => {
const event = {
type: 'error',
error: '',
// no session_id
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toContain('none');
});
it('uses consistent "Gemini agent failed" label for both result and error event fallbacks', () => {
const resultEvent = {
type: 'result',
status: 'error',
error: '',
session_id: 'sess-r',
stats: { duration_ms: 1000 },
};
const errorEvent = {
type: 'error',
error: '',
session_id: 'sess-e',
};
const resultMsg = provider.normalizeEvent(resultEvent) as ProviderMessage;
const errorMsg = provider.normalizeEvent(errorEvent) as ProviderMessage;
// Both fallback messages should use the same "Gemini agent failed" prefix
expect(resultMsg.error).toContain('Gemini agent failed');
expect(errorMsg.error).toContain('Gemini agent failed');
});
it('returns success result when result event has status=success', () => {
const event = {
type: 'result',
status: 'success',
error: '',
session_id: 'sess-gemini-ok',
stats: { duration_ms: 1200, total_tokens: 500 },
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('result');
expect(msg.subtype).toBe('success');
});
});
});

View File

@@ -685,6 +685,309 @@ describe('AgentExecutor', () => {
await expect(executor.execute(options, callbacks)).rejects.toThrow('API rate limit exceeded');
});
it('should throw "Unknown error" when provider stream yields error with empty message', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'error',
error: '',
session_id: 'sess-123',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow('Unknown error');
});
it('should throw with sanitized error when provider yields ANSI-decorated error', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'error',
// ANSI color codes + "Error: " prefix that should be stripped
error: '\x1b[31mError: Connection refused\x1b[0m',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
// Should strip ANSI codes and "Error: " prefix
await expect(executor.execute(options, callbacks)).rejects.toThrow('Connection refused');
});
it('should throw when result subtype is error_max_turns', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Working on it...' }],
},
};
yield {
type: 'result',
subtype: 'error_max_turns',
session_id: 'sess-456',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_max_turns'
);
});
it('should throw when result subtype is error_during_execution', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'result',
subtype: 'error_during_execution',
session_id: 'sess-789',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_during_execution'
);
});
it('should throw when result subtype is error_max_structured_output_retries', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'result',
subtype: 'error_max_structured_output_retries',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_max_structured_output_retries'
);
});
it('should throw when result subtype is error_max_budget_usd', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'result',
subtype: 'error_max_budget_usd',
session_id: 'sess-budget',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_max_budget_usd'
);
});
it('should NOT throw when result subtype is success', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Done!' }],
},
};
yield {
type: 'result',
subtype: 'success',
session_id: 'sess-ok',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
// Should resolve without throwing
const result = await executor.execute(options, callbacks);
expect(result.aborted).toBe(false);
expect(result.responseText).toContain('Done!');
});
it('should throw error when authentication fails in response', async () => {
const executor = new AgentExecutor(
mockEventBus,

View File

@@ -0,0 +1,192 @@
import { describe, it, expect } from 'vitest';
/**
* Contract tests verifying the tool marker format used by agent-executor
* (which writes agent output) and execution-service (which reads it to
* determine if the agent did meaningful work).
*
* The agent-executor writes: `\n🔧 Tool: ${block.name}\n`
* The execution-service checks: `agentOutput.includes('🔧 Tool:')`
*
* These tests ensure the marker format contract stays consistent and
* document the exact detection logic used for status determination.
*/
// The exact marker prefix that execution-service searches for
const TOOL_MARKER = '🔧 Tool:';
// Minimum output length threshold for "meaningful work"
const MIN_OUTPUT_LENGTH = 200;
/**
* Simulates the agent-executor's tool_use output format.
* See: agent-executor.ts line ~293
*/
function formatToolUseBlock(toolName: string, input?: Record<string, unknown>): string {
let output = `\n${TOOL_MARKER} ${toolName}\n`;
if (input) output += `Input: ${JSON.stringify(input, null, 2)}\n`;
return output;
}
/**
* Simulates the execution-service's output validation logic.
* See: execution-service.ts lines ~427-429
*/
function validateAgentOutput(
agentOutput: string,
skipTests: boolean
): 'verified' | 'waiting_approval' {
const hasToolUsage = agentOutput.includes(TOOL_MARKER);
const hasMinimalOutput = agentOutput.trim().length < MIN_OUTPUT_LENGTH;
const agentDidWork = hasToolUsage && !hasMinimalOutput;
if (skipTests) return 'waiting_approval';
if (!agentDidWork) return 'waiting_approval';
return 'verified';
}
describe('Agent Output Validation - Contract Tests', () => {
describe('tool marker format contract', () => {
it('agent-executor tool format contains the expected marker', () => {
const toolOutput = formatToolUseBlock('Read', { file_path: '/src/index.ts' });
expect(toolOutput).toContain(TOOL_MARKER);
});
it('agent-executor tool format includes tool name after marker', () => {
const toolOutput = formatToolUseBlock('Edit', {
file_path: '/src/app.ts',
old_string: 'foo',
new_string: 'bar',
});
expect(toolOutput).toContain('🔧 Tool: Edit');
});
it('agent-executor tool format includes JSON input', () => {
const input = { file_path: '/src/index.ts' };
const toolOutput = formatToolUseBlock('Read', input);
expect(toolOutput).toContain('Input: ');
expect(toolOutput).toContain('"file_path": "/src/index.ts"');
});
it('agent-executor tool format works without input', () => {
const toolOutput = formatToolUseBlock('Bash');
expect(toolOutput).toContain('🔧 Tool: Bash');
expect(toolOutput).not.toContain('Input:');
});
it('marker includes colon and space to avoid false positives', () => {
// Ensure the marker is specific enough to avoid matching other emoji patterns
expect(TOOL_MARKER).toBe('🔧 Tool:');
expect(TOOL_MARKER).toContain(':');
});
});
describe('output validation logic', () => {
it('verified: tool usage + sufficient output', () => {
const output =
'Starting implementation of the new feature...\n' +
formatToolUseBlock('Read', { file_path: '/src/index.ts' }) +
'I can see the existing code. Let me make the needed changes.\n' +
formatToolUseBlock('Edit', { file_path: '/src/index.ts' }) +
'Changes complete. The implementation adds new validation logic and tests.';
expect(output.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(output, false)).toBe('verified');
});
it('waiting_approval: no tool markers regardless of length', () => {
const longOutput = 'I analyzed the codebase. '.repeat(50);
expect(longOutput.trim().length).toBeGreaterThan(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(longOutput, false)).toBe('waiting_approval');
});
it('waiting_approval: tool markers but insufficient length', () => {
const shortOutput = formatToolUseBlock('Read', { file_path: '/src/a.ts' });
expect(shortOutput.trim().length).toBeLessThan(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(shortOutput, false)).toBe('waiting_approval');
});
it('waiting_approval: empty output', () => {
expect(validateAgentOutput('', false)).toBe('waiting_approval');
});
it('waiting_approval: skipTests always overrides', () => {
const goodOutput =
'Starting...\n' +
formatToolUseBlock('Read', { file_path: '/src/index.ts' }) +
formatToolUseBlock('Edit', { file_path: '/src/index.ts' }) +
'Done implementing. '.repeat(15);
expect(goodOutput.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(goodOutput, true)).toBe('waiting_approval');
});
it('boundary: exactly MIN_OUTPUT_LENGTH chars with tool is verified', () => {
const tool = formatToolUseBlock('Read');
const padding = 'x'.repeat(MIN_OUTPUT_LENGTH - tool.trim().length);
const output = tool + padding;
expect(output.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(output, false)).toBe('verified');
});
it('boundary: MIN_OUTPUT_LENGTH - 1 chars with tool is waiting_approval', () => {
const marker = `${TOOL_MARKER} Read\n`;
const padding = 'x'.repeat(MIN_OUTPUT_LENGTH - 1 - marker.length);
const output = marker + padding;
expect(output.trim().length).toBe(MIN_OUTPUT_LENGTH - 1);
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
});
describe('realistic provider scenarios', () => {
it('Claude SDK agent with multiple tools → verified', () => {
let output = "I'll implement the feature.\n\n";
output += formatToolUseBlock('Read', { file_path: '/src/components/App.tsx' });
output += 'I see the component. Let me update it.\n\n';
output += formatToolUseBlock('Edit', {
file_path: '/src/components/App.tsx',
old_string: 'const App = () => {',
new_string: 'const App: React.FC = () => {',
});
output += 'Done. The component is now typed correctly.\n';
expect(validateAgentOutput(output, false)).toBe('verified');
});
it('Cursor CLI quick exit (no tools) → waiting_approval', () => {
const output = 'Task received. Processing...\nResult: completed successfully.';
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
it('Codex CLI with brief acknowledgment → waiting_approval', () => {
const output = 'Understood the task. Starting implementation.\nDone.';
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
it('Agent that only reads but makes no edits (single Read tool, short output) → waiting_approval', () => {
const output = formatToolUseBlock('Read', { file_path: '/src/index.ts' }) + 'File read.';
expect(output.trim().length).toBeLessThan(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
it('Agent with extensive tool usage and explanation → verified', () => {
let output = 'Analyzing the codebase for the authentication feature.\n\n';
for (let i = 0; i < 5; i++) {
output += formatToolUseBlock('Read', { file_path: `/src/auth/handler${i}.ts` });
output += `Found handler ${i}. `;
}
output += formatToolUseBlock('Edit', {
file_path: '/src/auth/handler0.ts',
old_string: 'function login() {}',
new_string: 'async function login(creds: Credentials) { ... }',
});
output += 'Implementation complete with all authentication changes applied.\n';
expect(validateAgentOutput(output, false)).toBe('verified');
});
});
});

View File

@@ -211,7 +211,14 @@ describe('execution-service.ts', () => {
});
// Default mocks for secureFs
vi.mocked(secureFs.readFile).mockResolvedValue('Agent output content');
// Include tool usage markers to simulate meaningful agent output.
// The execution service checks for '🔧 Tool:' markers and minimum
// output length to determine if the agent did real work.
vi.mocked(secureFs.readFile).mockResolvedValue(
'Starting implementation...\n\n🔧 Tool: Read\nInput: {"file_path": "/src/index.ts"}\n\n' +
'🔧 Tool: Edit\nInput: {"file_path": "/src/index.ts", "old_string": "foo", "new_string": "bar"}\n\n' +
'Implementation complete. Updated the code as requested.'
);
vi.mocked(secureFs.access).mockResolvedValue(undefined);
// Re-setup platform mocks
@@ -1433,4 +1440,439 @@ describe('execution-service.ts', () => {
);
});
});
describe('executeFeature - agent output validation', () => {
// Helper to generate realistic agent output with tool markers
const makeAgentOutput = (toolCount: number, extraText = ''): string => {
let output = 'Starting implementation...\n\n';
for (let i = 0; i < toolCount; i++) {
output += `🔧 Tool: Edit\nInput: {"file_path": "/src/file${i}.ts", "old_string": "old${i}", "new_string": "new${i}"}\n\n`;
}
output += `Implementation complete. ${extraText}`;
return output;
};
const createServiceWithMocks = () => {
return new ExecutionService(
mockEventBus,
mockConcurrencyManager,
mockWorktreeResolver,
mockSettingsService,
mockRunAgentFn,
mockExecutePipelineFn,
mockUpdateFeatureStatusFn,
mockLoadFeatureFn,
mockGetPlanningPromptPrefixFn,
mockSaveFeatureSummaryFn,
mockRecordLearningsFn,
mockContextExistsFn,
mockResumeFeatureFn,
mockTrackFailureFn,
mockSignalPauseFn,
mockRecordSuccessFn,
mockSaveExecutionStateFn,
mockLoadContextFilesFn
);
};
it('sets verified when agent output has tool usage and sufficient length', async () => {
const output = makeAgentOutput(3, 'Updated authentication module with new login flow.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
await service.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('sets waiting_approval when agent output is empty', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output has no tool usage markers', async () => {
// Long output but no tool markers - agent printed text but didn't use tools
const longOutputNoTools = 'I analyzed the codebase and found several issues. '.repeat(20);
vi.mocked(secureFs.readFile).mockResolvedValue(longOutputNoTools);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output has tool markers but is too short', async () => {
// Has a tool marker but total output is under 200 chars
const shortWithTool = '🔧 Tool: Read\nInput: {"file_path": "/src/index.ts"}\nDone.';
expect(shortWithTool.trim().length).toBeLessThan(200);
vi.mocked(secureFs.readFile).mockResolvedValue(shortWithTool);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output file is missing (ENOENT)', async () => {
vi.mocked(secureFs.readFile).mockRejectedValue(new Error('ENOENT'));
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output is only whitespace', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue(' \n\n\t \n ');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets verified when output is exactly at the 200 char threshold with tool usage', async () => {
// Create output that's exactly 200 chars trimmed with tool markers
const toolMarker = '🔧 Tool: Edit\nInput: {"file_path": "/src/index.ts"}\n';
const padding = 'x'.repeat(200 - toolMarker.length);
const output = toolMarker + padding;
expect(output.trim().length).toBeGreaterThanOrEqual(200);
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('sets waiting_approval when output is 199 chars with tool usage (below threshold)', async () => {
const toolMarker = '🔧 Tool: Read\n';
const padding = 'x'.repeat(199 - toolMarker.length);
const output = toolMarker + padding;
expect(output.trim().length).toBe(199);
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('skipTests always takes priority over output validation', async () => {
// Meaningful output with tool usage - would normally be 'verified'
const output = makeAgentOutput(5, 'All changes applied successfully.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
mockLoadFeatureFn = vi.fn().mockResolvedValue({ ...testFeature, skipTests: true });
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// skipTests=true always means waiting_approval regardless of output quality
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('skipTests with empty output still results in waiting_approval', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
mockLoadFeatureFn = vi.fn().mockResolvedValue({ ...testFeature, skipTests: true });
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('still records success even when output validation fails', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// recordSuccess should still be called - the agent ran without errors
expect(mockRecordSuccessFn).toHaveBeenCalled();
});
it('still extracts summary when output has content but no tool markers', async () => {
const outputNoTools = 'A '.repeat(150); // > 200 chars but no tool markers
vi.mocked(secureFs.readFile).mockResolvedValue(outputNoTools);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Summary extraction still runs even though status is waiting_approval
expect(extractSummary).toHaveBeenCalledWith(outputNoTools);
expect(mockSaveFeatureSummaryFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'Test summary'
);
});
it('emits feature_complete with passes=true even when output validation routes to waiting_approval', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, true);
// The agent ran without error - it's still a "pass" from the execution perspective
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
'auto_mode_feature_complete',
expect.objectContaining({ passes: true })
);
});
it('handles realistic Cursor CLI output that exits quickly', async () => {
// Simulates a Cursor CLI that prints a brief message and exits
const cursorQuickExit = 'Task received. Processing...\nResult: completed successfully.';
expect(cursorQuickExit.includes('🔧 Tool:')).toBe(false);
vi.mocked(secureFs.readFile).mockResolvedValue(cursorQuickExit);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// No tool usage = waiting_approval
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('handles realistic Claude SDK output with multiple tool uses', async () => {
// Simulates a Claude SDK agent that does real work
const claudeOutput =
"I'll implement the requested feature.\n\n" +
'🔧 Tool: Read\nInput: {"file_path": "/src/components/App.tsx"}\n\n' +
'I can see the existing component structure. Let me modify it.\n\n' +
'🔧 Tool: Edit\nInput: {"file_path": "/src/components/App.tsx", "old_string": "const App = () => {", "new_string": "const App: React.FC = () => {"}\n\n' +
'🔧 Tool: Write\nInput: {"file_path": "/src/components/NewFeature.tsx"}\n\n' +
"I've created the new component and updated the existing one. The feature is now implemented with proper TypeScript types.";
vi.mocked(secureFs.readFile).mockResolvedValue(claudeOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Real work = verified
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('reads agent output from the correct path with utf-8 encoding', async () => {
const output = makeAgentOutput(2, 'Done with changes.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Verify readFile was called with the correct path derived from getFeatureDir
expect(secureFs.readFile).toHaveBeenCalledWith(
'/test/project/.automaker/features/feature-1/agent-output.md',
'utf-8'
);
});
it('completion message includes auto-verified when status is verified', async () => {
const output = makeAgentOutput(3, 'All changes applied.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, true);
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
'auto_mode_feature_complete',
expect.objectContaining({
message: expect.stringContaining('auto-verified'),
})
);
});
it('completion message does NOT include auto-verified when status is waiting_approval', async () => {
// Empty output → waiting_approval
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, true);
const completeCall = vi
.mocked(mockEventBus.emitAutoModeEvent)
.mock.calls.find((call) => call[0] === 'auto_mode_feature_complete');
expect(completeCall).toBeDefined();
expect((completeCall![1] as { message: string }).message).not.toContain('auto-verified');
});
it('uses same agentOutput for both status determination and summary extraction', async () => {
// Specific output that is long enough with tool markers (verified path)
// AND has content for summary extraction
const specificOutput =
'🔧 Tool: Read\nReading file...\n🔧 Tool: Edit\nEditing file...\n' +
'The implementation is complete. Here is a detailed description of what was done. '.repeat(
3
);
vi.mocked(secureFs.readFile).mockResolvedValue(specificOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Status should be verified (has tools + long enough)
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
// extractSummary should receive the exact same output
expect(extractSummary).toHaveBeenCalledWith(specificOutput);
// recordLearnings should also receive the same output
expect(mockRecordLearningsFn).toHaveBeenCalledWith(
'/test/project',
testFeature,
specificOutput
);
});
it('does not call recordMemoryUsage when output is empty and memoryFiles is empty', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const { recordMemoryUsage } = await import('@automaker/utils');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// With empty output and empty memoryFiles, recordMemoryUsage should not be called
expect(recordMemoryUsage).not.toHaveBeenCalled();
});
it('handles output with special unicode characters correctly', async () => {
// Output with various unicode but includes tool markers
const unicodeOutput =
'🔧 Tool: Read\n' +
'🔧 Tool: Edit\n' +
'Añadiendo función de búsqueda con caracteres especiales: ñ, ü, ö, é, 日本語テスト. ' +
'Die Änderungen wurden erfolgreich implementiert. '.repeat(3);
vi.mocked(secureFs.readFile).mockResolvedValue(unicodeOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Should still detect tool markers and sufficient length
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('treats output with only newlines and spaces around tool marker as insufficient', async () => {
// Has tool marker but surrounded by whitespace, total trimmed < 200
const sparseOutput = '\n\n 🔧 Tool: Read \n\n';
expect(sparseOutput.trim().length).toBeLessThan(200);
vi.mocked(secureFs.readFile).mockResolvedValue(sparseOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('detects tool marker substring correctly (partial match like "🔧 Tools:" does not count)', async () => {
// Output with a similar but not exact marker - "🔧 Tools:" instead of "🔧 Tool:"
const wrongMarker = '🔧 Tools: Read\n🔧 Tools: Edit\n' + 'Implementation done. '.repeat(20);
expect(wrongMarker.includes('🔧 Tool:')).toBe(false);
vi.mocked(secureFs.readFile).mockResolvedValue(wrongMarker);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// "🔧 Tools:" is not the same as "🔧 Tool:" - should be waiting_approval
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('pipeline merge_conflict status short-circuits before output validation', async () => {
// Set up pipeline that results in merge_conflict
vi.mocked(pipelineService.getPipelineConfig).mockResolvedValue({
version: 1,
steps: [{ id: 'step-1', name: 'Step 1', order: 1, instructions: 'Do step 1' }] as any,
});
// After pipeline, loadFeature returns merge_conflict status
let loadCallCount = 0;
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
loadCallCount++;
if (loadCallCount === 1) return testFeature; // initial load
// All subsequent loads (task check + pipeline refresh) return merge_conflict
return { ...testFeature, status: 'merge_conflict' };
});
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Should NOT have called updateFeatureStatusFn with 'verified' or 'waiting_approval'
// because pipeline merge_conflict short-circuits the method
const statusCalls = vi
.mocked(mockUpdateFeatureStatusFn)
.mock.calls.filter((call) => call[2] === 'verified' || call[2] === 'waiting_approval');
// The only non-in_progress status call should be absent since merge_conflict returns early
expect(statusCalls.length).toBe(0);
});
});
});

View File

@@ -1,7 +1,7 @@
import { defineConfig, devices } from '@playwright/test';
const port = process.env.TEST_PORT || 3007;
const serverPort = process.env.TEST_SERVER_PORT || 3008;
const port = process.env.TEST_PORT || 3107;
const serverPort = process.env.TEST_SERVER_PORT || 3108;
const reuseServer = process.env.TEST_REUSE_SERVER === 'true';
const useExternalBackend = !!process.env.VITE_SERVER_URL;
// Always use mock agent for tests (disables rate limiting, uses mock Claude responses)
@@ -70,6 +70,10 @@ export default defineConfig({
timeout: 120000,
env: {
...process.env,
// Must set AUTOMAKER_WEB_PORT to match the port Playwright waits for
AUTOMAKER_WEB_PORT: String(port),
// Must set AUTOMAKER_SERVER_PORT so Vite proxy forwards to the correct backend port
AUTOMAKER_SERVER_PORT: String(serverPort),
VITE_SKIP_SETUP: 'true',
// Always skip electron plugin during tests - prevents duplicate server spawning
VITE_SKIP_ELECTRON: 'true',

View File

@@ -8,10 +8,11 @@ import { promisify } from 'util';
const execAsync = promisify(exec);
const SERVER_PORT = process.env.TEST_SERVER_PORT || 3008;
const UI_PORT = process.env.TEST_PORT || 3007;
const SERVER_PORT = process.env.TEST_SERVER_PORT || 3108;
const UI_PORT = process.env.TEST_PORT || 3107;
const USE_EXTERNAL_SERVER = !!process.env.VITE_SERVER_URL;
console.log(`[KillTestServers] SERVER_PORT ${SERVER_PORT}`);
console.log(`[KillTestServers] UI_PORT ${UI_PORT}`);
async function killProcessOnPort(port) {
try {
const hasLsof = await execAsync('command -v lsof').then(

View File

@@ -45,7 +45,7 @@ import { toast } from 'sonner';
import type { PRReviewComment } from '@/lib/electron';
import type { Feature } from '@/store/app-store';
import type { PhaseModelEntry } from '@automaker/types';
import { supportsReasoningEffort, isAdaptiveThinkingModel } from '@automaker/types';
import { supportsReasoningEffort, normalizeThinkingLevelForModel } from '@automaker/types';
import { resolveModelString } from '@automaker/model-resolver';
import { PhaseModelSelector } from '@/components/views/settings-view/model-defaults';
@@ -590,20 +590,10 @@ export function PRCommentResolutionDialog({
const wasOpenRef = useRef(false);
const handleModelChange = useCallback((entry: PhaseModelEntry) => {
// Normalize thinking level when switching between adaptive and non-adaptive models
const isNewModelAdaptive =
typeof entry.model === 'string' && isAdaptiveThinkingModel(entry.model);
const currentLevel = entry.thinkingLevel || 'none';
const modelId = typeof entry.model === 'string' ? entry.model : '';
const normalizedThinkingLevel = normalizeThinkingLevelForModel(modelId, entry.thinkingLevel);
if (isNewModelAdaptive && currentLevel !== 'none' && currentLevel !== 'adaptive') {
// Switching TO an adaptive model with a manual level -> auto-switch to 'adaptive'
setModelEntry({ ...entry, thinkingLevel: 'adaptive' });
} else if (!isNewModelAdaptive && currentLevel === 'adaptive') {
// Switching FROM an adaptive model with adaptive -> auto-switch to 'high'
setModelEntry({ ...entry, thinkingLevel: 'high' });
} else {
setModelEntry(entry);
}
setModelEntry({ ...entry, thinkingLevel: normalizedThinkingLevel });
}, []);
// Fetch PR review comments

View File

@@ -26,11 +26,12 @@ import { useNavigate } from '@tanstack/react-router';
import { toast } from 'sonner';
import { cn } from '@/lib/utils';
import { modelSupportsThinking } from '@/lib/utils';
import { useAppStore, ThinkingLevel, FeatureImage, PlanningMode, Feature } from '@/store/app-store';
import { useAppStore } from '@/store/app-store';
import type { ThinkingLevel, PlanningMode, Feature, FeatureImage } from '@/store/types';
import type { ReasoningEffort, PhaseModelEntry, AgentModel } from '@automaker/types';
import {
supportsReasoningEffort,
isAdaptiveThinkingModel,
normalizeThinkingLevelForModel,
getThinkingLevelsForModel,
} from '@automaker/types';
import {
@@ -308,20 +309,10 @@ export function AddFeatureDialog({
}, [planningMode]);
const handleModelChange = (entry: PhaseModelEntry) => {
// Normalize thinking level when switching between adaptive and non-adaptive models
const isNewModelAdaptive =
typeof entry.model === 'string' && isAdaptiveThinkingModel(entry.model);
const currentLevel = entry.thinkingLevel || 'none';
const modelId = typeof entry.model === 'string' ? entry.model : '';
const normalizedThinkingLevel = normalizeThinkingLevelForModel(modelId, entry.thinkingLevel);
if (isNewModelAdaptive && currentLevel !== 'none' && currentLevel !== 'adaptive') {
// Switching TO Opus 4.6 with a manual level -> auto-switch to 'adaptive'
setModelEntry({ ...entry, thinkingLevel: 'adaptive' });
} else if (!isNewModelAdaptive && currentLevel === 'adaptive') {
// Switching FROM Opus 4.6 with adaptive -> auto-switch to 'high'
setModelEntry({ ...entry, thinkingLevel: 'high' });
} else {
setModelEntry(entry);
}
setModelEntry({ ...entry, thinkingLevel: normalizedThinkingLevel });
};
const buildFeatureData = (): FeatureData | null => {

View File

@@ -1017,6 +1017,7 @@ export function PhaseModelSelector({
{/* Secondary zone: expand reasoning effort popover */}
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedCodexModel(null);
@@ -1409,7 +1410,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
providerId: provider.id,
model: model.id,
@@ -1497,6 +1500,7 @@ export function PhaseModelSelector({
{/* Secondary zone: expand thinking level popover */}
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedProviderModel(null);
@@ -1549,7 +1553,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
providerId: provider.id,
model: model.id,
@@ -1752,7 +1758,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
model: model.id as ModelAlias,
thinkingLevel: level,
@@ -1856,6 +1864,7 @@ export function PhaseModelSelector({
{/* Secondary zone: expand thinking level popover */}
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedClaudeModel(null);
@@ -1906,7 +1915,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
model: model.id as ModelAlias,
thinkingLevel: level,
@@ -2054,6 +2065,7 @@ export function PhaseModelSelector({
>
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedGroup(null);

View File

@@ -20,11 +20,11 @@ export const DEFAULT_HEIGHT = 950;
// ============================================
// Default ports (can be overridden via env) - will be dynamically assigned if these are in use
// When launched via root init.mjs we pass:
// - PORT (backend)
// - TEST_PORT (vite dev server / static)
// - SERVER_PORT (backend API server)
// - PORT (Vite dev server / static file server)
// Guard against NaN from non-numeric environment variables
const parsedServerPort = Number.parseInt(process.env.PORT ?? '', 10);
const parsedStaticPort = Number.parseInt(process.env.TEST_PORT ?? '', 10);
const parsedServerPort = Number.parseInt(process.env.SERVER_PORT ?? '', 10);
const parsedStaticPort = Number.parseInt(process.env.PORT ?? '', 10);
export const DEFAULT_SERVER_PORT = Number.isFinite(parsedServerPort) ? parsedServerPort : 3008;
export const DEFAULT_STATIC_PORT = Number.isFinite(parsedStaticPort) ? parsedStaticPort : 3007;

View File

@@ -33,11 +33,11 @@
--input: oklch(0.98 0 0);
--ring: oklch(0.3 0 0);
--chart-1: oklch(0.3 0 0);
--chart-2: oklch(0.5 0 0);
--chart-3: oklch(0.4 0 0);
--chart-4: oklch(0.6 0 0);
--chart-5: oklch(0.35 0 0);
--chart-1: oklch(0.5 0.14 25); /* Warm red - strings, regex */
--chart-2: oklch(0.5 0.13 250); /* Blue - properties, variables */
--chart-3: oklch(0.55 0.13 145); /* Green - numbers */
--chart-4: oklch(0.45 0.14 300); /* Purple - keywords, booleans, tags */
--chart-5: oklch(0.5 0.12 180); /* Teal - types, classes */
--sidebar: oklch(0.98 0 0);
--sidebar-foreground: oklch(0.15 0 0);

View File

@@ -297,10 +297,10 @@ await expect(page.locator('[data-testid="my-element"]')).toBeVisible();
### Port conflicts
If you see "Port 3008 is already in use", kill the process:
If you see "Port 3108 is already in use", kill the process:
```bash
lsof -ti:3008 | xargs kill -9
lsof -ti:3108 | xargs kill -9
```
## Available Test Utilities

View File

@@ -21,6 +21,7 @@ import {
getKanbanColumn,
authenticateForTests,
handleLoginScreenIfPresent,
API_BASE_URL,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('manual-review-test');
@@ -155,7 +156,6 @@ test.describe('Feature Manual Review Flow', () => {
priority: 2,
};
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
const createResponse = await page.request.post(`${API_BASE_URL}/api/features/create`, {
data: { projectPath, feature },
headers: { 'Content-Type': 'application/json' },

View File

@@ -0,0 +1,110 @@
import { test, expect } from '@playwright/test';
import * as fs from 'fs';
import * as path from 'path';
import {
createTempDirPath,
cleanupTempDir,
setupRealProject,
waitForNetworkIdle,
clickAddFeature,
fillAddFeatureDialog,
confirmAddFeature,
authenticateForTests,
handleLoginScreenIfPresent,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('opus-thinking-level-none');
test.describe('Opus thinking level', () => {
let projectPath: string;
const projectName = `test-project-${Date.now()}`;
test.beforeAll(async () => {
if (!fs.existsSync(TEST_TEMP_DIR)) {
fs.mkdirSync(TEST_TEMP_DIR, { recursive: true });
}
projectPath = path.join(TEST_TEMP_DIR, projectName);
fs.mkdirSync(projectPath, { recursive: true });
fs.writeFileSync(
path.join(projectPath, 'package.json'),
JSON.stringify({ name: projectName, version: '1.0.0' }, null, 2)
);
const automakerDir = path.join(projectPath, '.automaker');
fs.mkdirSync(automakerDir, { recursive: true });
fs.mkdirSync(path.join(automakerDir, 'features'), { recursive: true });
fs.mkdirSync(path.join(automakerDir, 'context'), { recursive: true });
fs.writeFileSync(
path.join(automakerDir, 'categories.json'),
JSON.stringify({ categories: [] }, null, 2)
);
fs.writeFileSync(
path.join(automakerDir, 'app_spec.txt'),
`# ${projectName}\n\nA test project for Opus thinking level e2e coverage.`
);
});
test.afterAll(async () => {
cleanupTempDir(TEST_TEMP_DIR);
});
test('persists thinkingLevel none when selected for Claude Opus', async ({ page }) => {
const featureDescription = `Opus none thinking ${Date.now()}`;
await setupRealProject(page, projectPath, projectName, { setAsCurrent: true });
await authenticateForTests(page);
await page.goto('/board');
await page.waitForLoadState('load');
await handleLoginScreenIfPresent(page);
await waitForNetworkIdle(page);
await clickAddFeature(page);
await fillAddFeatureDialog(page, featureDescription);
await page.locator('[data-testid="model-selector"]').click();
await page.locator('[cmdk-input]').fill('opus');
const opusItem = page.locator('[cmdk-item]').filter({ hasText: 'Claude Opus' }).first();
await expect(opusItem).toBeVisible({ timeout: 10000 });
await opusItem.locator('button[title="Adjust thinking level"]').click();
// Wait for the thinking level popover to appear
// The nested popover contains "Thinking Level" text and "None" option
// Radix UI popovers need a brief delay for the animation to complete
const POPOVER_OPEN_DELAY_MS = 500;
await page.waitForTimeout(POPOVER_OPEN_DELAY_MS);
// Find and click the None button
// The button's accessible name includes description: "None No extended thinking"
const noneOption = page.getByRole('button', { name: /None.*No extended thinking/i }).first();
await expect(noneOption).toBeVisible({ timeout: 5000 });
await noneOption.click();
// Wait for the popover to close and the state to update
// The React state update needs a brief moment to propagate to the badge
const STATE_UPDATE_DELAY_MS = 300;
await page.waitForTimeout(STATE_UPDATE_DELAY_MS);
// When "None" is selected, the badge should NOT show "Adaptive"
await expect(page.locator('[data-testid="model-selector"]')).not.toContainText('Adaptive');
await confirmAddFeature(page);
const featuresDir = path.join(projectPath, '.automaker', 'features');
await expect.poll(() => fs.readdirSync(featuresDir).length).toBe(1);
const featureDir = fs.readdirSync(featuresDir)[0];
const featureJsonPath = path.join(featuresDir, featureDir, 'feature.json');
const featureJson = JSON.parse(fs.readFileSync(featureJsonPath, 'utf-8')) as {
description: string;
thinkingLevel: string;
};
expect(featureJson.description).toBe(featureDescription);
expect(featureJson.thinkingLevel).toBe('none');
});
});

View File

@@ -18,6 +18,7 @@ import {
authenticateForTests,
handleLoginScreenIfPresent,
} from '../utils';
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../utils/core/waiting';
const TEST_TEMP_DIR = createTempDirPath('planning-mode-verification-test');
@@ -69,7 +70,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
await handleLoginScreenIfPresent(page);
await waitForNetworkIdle(page);
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({ timeout: 10000 });
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
await expect(page.locator('[data-testid="kanban-column-backlog"]')).toBeVisible({
timeout: 5000,
});
@@ -77,9 +80,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
// Open the add feature dialog
await clickAddFeature(page);
// Wait for dialog to be visible
// Wait for dialog to be visible (clickAddFeature already waits, but this adds an extra check)
await expect(page.locator('[data-testid="add-feature-dialog"]')).toBeVisible({
timeout: 5000,
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
// Find the planning mode select trigger

View File

@@ -22,6 +22,7 @@ import {
getKanbanColumn,
authenticateForTests,
handleLoginScreenIfPresent,
API_BASE_URL,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('running-task-display-test');
@@ -142,8 +143,6 @@ test.describe('Running Task Card Display', () => {
priority: 2,
};
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
// Create both features via HTTP API
const createInProgress = await page.request.post(`${API_BASE_URL}/api/features/create`, {
data: { projectPath, feature: inProgressFeature },

View File

@@ -4,7 +4,7 @@
*/
import { Page, APIResponse } from '@playwright/test';
import { API_BASE_URL, API_ENDPOINTS } from '../core/constants';
import { API_BASE_URL, API_ENDPOINTS, WEB_BASE_URL } from '../core/constants';
// ============================================================================
// Types
@@ -300,7 +300,7 @@ export async function authenticateWithApiKey(page: Page, apiKey: string): Promis
// Ensure we're on a page (needed for cookies to work)
const currentUrl = page.url();
if (!currentUrl || currentUrl === 'about:blank') {
await page.goto('http://localhost:3007', { waitUntil: 'domcontentloaded' });
await page.goto(WEB_BASE_URL, { waitUntil: 'domcontentloaded' });
}
// Use Playwright request API (tied to this browser context) to avoid flakiness

View File

@@ -9,8 +9,19 @@
/**
* Base URL for the API server
* Uses TEST_SERVER_PORT env var (default 3108) for test runs
*/
export const API_BASE_URL = 'http://localhost:3008';
export const API_BASE_URL = process.env.TEST_SERVER_PORT
? `http://localhost:${process.env.TEST_SERVER_PORT}`
: 'http://localhost:3108';
/**
* Base URL for the frontend web server
* Uses TEST_PORT env var (default 3107) for test runs
*/
export const WEB_BASE_URL = process.env.TEST_PORT
? `http://localhost:${process.env.TEST_PORT}`
: 'http://localhost:3107';
/**
* API endpoints for worktree operations

View File

@@ -1,5 +1,12 @@
import { Page, Locator } from '@playwright/test';
/**
* Default timeout for element waiting operations in E2E tests.
* Increased from 5000ms to 10000ms to accommodate CI environments
* where dialog rendering may take longer due to React Query data fetching.
*/
export const DEFAULT_ELEMENT_TIMEOUT_MS = 10000;
/**
* Wait for the page to load
* Uses 'load' state instead of 'networkidle' because the app has persistent
@@ -20,7 +27,7 @@ export async function waitForElement(
): Promise<Locator> {
const element = page.locator(`[data-testid="${testId}"]`);
await element.waitFor({
timeout: options?.timeout ?? 5000,
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
state: options?.state ?? 'visible',
});
return element;
@@ -36,7 +43,7 @@ export async function waitForElementHidden(
): Promise<void> {
const element = page.locator(`[data-testid="${testId}"]`);
await element.waitFor({
timeout: options?.timeout ?? 5000,
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
state: 'hidden',
});
}

View File

@@ -1,4 +1,5 @@
import { Page, Locator } from '@playwright/test';
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../core/waiting';
/**
* Get a kanban card by feature ID
@@ -96,9 +97,16 @@ export async function getDragHandleForFeature(page: Page, featureId: string): Pr
* Click the add feature button
*/
export async function clickAddFeature(page: Page): Promise<void> {
await page.click('[data-testid="add-feature-button"]');
// There may be multiple add-feature buttons on the page (header, empty state).
// Use .first() to click the first visible one.
const addButton = page.locator('[data-testid="add-feature-button"]').first();
await addButton.waitFor({ state: 'visible', timeout: DEFAULT_ELEMENT_TIMEOUT_MS });
await addButton.click({ timeout: 5000 });
// Wait for dialog to be visible
await page.waitForSelector('[data-testid="add-feature-dialog"]', {
timeout: 5000,
state: 'visible',
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
}
@@ -159,7 +167,7 @@ export async function confirmAddFeature(page: Page): Promise<void> {
await page.click('[data-testid="confirm-add-feature"]');
// Wait for dialog to close
await page.waitForFunction(() => !document.querySelector('[data-testid="add-feature-dialog"]'), {
timeout: 5000,
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
}

View File

@@ -249,11 +249,11 @@ export default defineConfig(({ command }) => {
},
server: {
host: process.env.HOST || '0.0.0.0',
port: parseInt(process.env.TEST_PORT || process.env.AUTOMAKER_WEB_PORT || '3007', 10),
port: parseInt(process.env.AUTOMAKER_WEB_PORT || '3007', 10),
allowedHosts: true,
proxy: {
'/api': {
target: 'http://localhost:' + (process.env.AUTOMAKER_SERVER_PORT ?? '5008'),
target: 'http://localhost:' + (process.env.AUTOMAKER_SERVER_PORT ?? '3008'),
changeOrigin: true,
ws: true,
},

View File

@@ -109,7 +109,7 @@ services:
environment:
- NODE_ENV=development
- VITE_SERVER_URL=http://localhost:3008
- TEST_PORT=3007
- TEST_PORT=3107
- VITE_SKIP_ELECTRON=true
- VITE_APP_MODE=3
- HUSKY=0

View File

@@ -202,6 +202,7 @@ export {
getThinkingTokenBudget,
isAdaptiveThinkingModel,
getThinkingLevelsForModel,
normalizeThinkingLevelForModel,
getDefaultThinkingLevel,
// Event hook constants
EVENT_HOOK_TRIGGER_LABELS,

View File

@@ -260,7 +260,13 @@ export interface ContentBlock {
*/
export interface ProviderMessage {
type: 'assistant' | 'user' | 'error' | 'result';
subtype?: 'success' | 'error' | 'error_max_turns' | 'error_max_structured_output_retries';
subtype?:
| 'success'
| 'error'
| 'error_max_turns'
| 'error_max_structured_output_retries'
| 'error_during_execution'
| 'error_max_budget_usd';
session_id?: string;
message?: {
role: 'user' | 'assistant';

View File

@@ -349,6 +349,28 @@ export function getThinkingLevelsForModel(model: string): ThinkingLevel[] {
return ['none', 'low', 'medium', 'high', 'ultrathink'];
}
/**
* Normalize a selected thinking level to a value supported by the target model.
* Prefers preserving the selected level, falls back to 'none' when available.
*/
export function normalizeThinkingLevelForModel(
model: string,
thinkingLevel: ThinkingLevel | undefined
): ThinkingLevel {
const availableLevels = getThinkingLevelsForModel(model);
const currentLevel = thinkingLevel || 'none';
if (availableLevels.includes(currentLevel)) {
return currentLevel;
}
if (availableLevels.includes('none')) {
return 'none';
}
return availableLevels[0];
}
/**
* Get the default thinking level for a given model.
* Used when selecting a model via the primary button in the two-stage selector.

View File

@@ -57,6 +57,8 @@ DEFAULT_SERVER_PORT=${AUTOMAKER_SERVER_PORT:-3008}
PORT_SEARCH_MAX_ATTEMPTS=100
WEB_PORT=$DEFAULT_WEB_PORT
SERVER_PORT=$DEFAULT_SERVER_PORT
TEST_WEB_PORT=${TEST_PORT:-3107}
TEST_SERVER_PORT=${TEST_SERVER_PORT:-3108}
# Port validation function
# Returns 0 if valid, 1 if invalid (with error message printed)
@@ -1184,7 +1186,8 @@ case $MODE in
if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
export TEST_PORT="$WEB_PORT"
export TEST_PORT="$TEST_WEB_PORT"
export TEST_SERVER_PORT="$TEST_SERVER_PORT"
export VITE_SERVER_URL="http://${APP_HOST}:$SERVER_PORT"
export PORT="$SERVER_PORT"
export DATA_DIR="$SCRIPT_DIR/data"
@@ -1275,7 +1278,8 @@ case $MODE in
;;
electron)
# Set environment variables for Electron (it starts its own server)
export TEST_PORT="$WEB_PORT"
export TEST_PORT="$TEST_WEB_PORT"
export TEST_SERVER_PORT="$TEST_SERVER_PORT"
export PORT="$SERVER_PORT"
export VITE_SERVER_URL="http://localhost:$SERVER_PORT"
export CORS_ORIGIN="http://localhost:$WEB_PORT,http://127.0.0.1:$WEB_PORT"