mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-03-16 21:53:07 +00:00
Fix agent output validation to prevent false verified status (#807)
* Changes from fix/cursor-fix * feat: Enhance provider error messages with diagnostic context, address test failure, fix port change, move playwright tests to different port * Update apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * ci: Update test server port from 3008 to 3108 and add environment configuration * fix: Correct typo in health endpoint URL and standardize port env vars --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
19
.github/workflows/e2e-tests.yml
vendored
19
.github/workflows/e2e-tests.yml
vendored
@@ -46,7 +46,8 @@ jobs:
|
||||
echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV
|
||||
|
||||
env:
|
||||
PORT: 3008
|
||||
PORT: 3108
|
||||
TEST_SERVER_PORT: 3108
|
||||
NODE_ENV: test
|
||||
# Use a deterministic API key so Playwright can log in reliably
|
||||
AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
|
||||
@@ -81,13 +82,13 @@ jobs:
|
||||
|
||||
# Wait for health endpoint
|
||||
for i in {1..60}; do
|
||||
if curl -s -f http://localhost:3008/api/health > /dev/null 2>&1; then
|
||||
if curl -s -f http://localhost:3108/api/health > /dev/null 2>&1; then
|
||||
echo "Backend server is ready!"
|
||||
echo "=== Backend logs ==="
|
||||
cat backend.log
|
||||
echo ""
|
||||
echo "Health check response:"
|
||||
curl -s http://localhost:3008/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3008/api/health 2>/dev/null || echo 'No response')"
|
||||
curl -s http://localhost:3108/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3108/api/health 2>/dev/null || echo 'No response')"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
@@ -111,11 +112,11 @@ jobs:
|
||||
ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
|
||||
echo ""
|
||||
echo "=== Port status ==="
|
||||
netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
|
||||
lsof -i :3008 2>/dev/null || echo "lsof not available or port not in use"
|
||||
netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"
|
||||
lsof -i :3108 2>/dev/null || echo "lsof not available or port not in use"
|
||||
echo ""
|
||||
echo "=== Health endpoint test ==="
|
||||
curl -v http://localhost:3008/api/health 2>&1 || echo "Health endpoint failed"
|
||||
curl -v http://localhost:3108/api/health 2>&1 || echo "Health endpoint failed"
|
||||
|
||||
# Kill the server process if it's still hanging
|
||||
if kill -0 $SERVER_PID 2>/dev/null; then
|
||||
@@ -132,8 +133,8 @@ jobs:
|
||||
run: npm run test --workspace=apps/ui
|
||||
env:
|
||||
CI: true
|
||||
VITE_SERVER_URL: http://localhost:3008
|
||||
SERVER_URL: http://localhost:3008
|
||||
VITE_SERVER_URL: http://localhost:3108
|
||||
SERVER_URL: http://localhost:3108
|
||||
VITE_SKIP_SETUP: 'true'
|
||||
# Keep UI-side login/defaults consistent
|
||||
AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
|
||||
@@ -148,7 +149,7 @@ jobs:
|
||||
ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
|
||||
echo ""
|
||||
echo "=== Port status ==="
|
||||
netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
|
||||
netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"
|
||||
|
||||
- name: Upload Playwright report
|
||||
uses: actions/upload-artifact@v4
|
||||
|
||||
@@ -52,6 +52,12 @@ HOST=0.0.0.0
|
||||
# Port to run the server on
|
||||
PORT=3008
|
||||
|
||||
# Port to run the server on for testing
|
||||
TEST_SERVER_PORT=3108
|
||||
|
||||
# Port to run the UI on for testing
|
||||
TEST_PORT=3107
|
||||
|
||||
# Data directory for sessions and metadata
|
||||
DATA_DIR=./data
|
||||
|
||||
|
||||
@@ -389,9 +389,14 @@ export class CopilotProvider extends CliProvider {
|
||||
|
||||
case 'session.error': {
|
||||
const errorEvent = sdkEvent as SdkSessionErrorEvent;
|
||||
const enrichedError =
|
||||
errorEvent.data.message ||
|
||||
(errorEvent.data.code
|
||||
? `Copilot agent error (code: ${errorEvent.data.code})`
|
||||
: 'Copilot agent error');
|
||||
return {
|
||||
type: 'error',
|
||||
error: errorEvent.data.message || 'Unknown error',
|
||||
error: enrichedError,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -562,10 +562,14 @@ export class CursorProvider extends CliProvider {
|
||||
const resultEvent = cursorEvent as CursorResultEvent;
|
||||
|
||||
if (resultEvent.is_error) {
|
||||
const errorText = resultEvent.error || resultEvent.result || '';
|
||||
const enrichedError =
|
||||
errorText ||
|
||||
`Cursor agent failed (duration: ${resultEvent.duration_ms}ms, subtype: ${resultEvent.subtype}, session: ${resultEvent.session_id ?? 'none'})`;
|
||||
return {
|
||||
type: 'error',
|
||||
session_id: resultEvent.session_id,
|
||||
error: resultEvent.error || resultEvent.result || 'Unknown error',
|
||||
error: enrichedError,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -381,10 +381,13 @@ export class GeminiProvider extends CliProvider {
|
||||
const resultEvent = geminiEvent as GeminiResultEvent;
|
||||
|
||||
if (resultEvent.status === 'error') {
|
||||
const enrichedError =
|
||||
resultEvent.error ||
|
||||
`Gemini agent failed (duration: ${resultEvent.stats?.duration_ms ?? 'unknown'}ms, session: ${resultEvent.session_id ?? 'none'})`;
|
||||
return {
|
||||
type: 'error',
|
||||
session_id: resultEvent.session_id,
|
||||
error: resultEvent.error || 'Unknown error',
|
||||
error: enrichedError,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -401,10 +404,12 @@ export class GeminiProvider extends CliProvider {
|
||||
|
||||
case 'error': {
|
||||
const errorEvent = geminiEvent as GeminiResultEvent;
|
||||
const enrichedError =
|
||||
errorEvent.error || `Gemini agent failed (session: ${errorEvent.session_id ?? 'none'})`;
|
||||
return {
|
||||
type: 'error',
|
||||
session_id: errorEvent.session_id,
|
||||
error: errorEvent.error || 'Unknown error',
|
||||
error: enrichedError,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -296,8 +296,28 @@ export class AgentExecutor {
|
||||
}
|
||||
}
|
||||
} else if (msg.type === 'error') {
|
||||
throw new Error(AgentExecutor.sanitizeProviderError(msg.error));
|
||||
} else if (msg.type === 'result' && msg.subtype === 'success') scheduleWrite();
|
||||
const sanitized = AgentExecutor.sanitizeProviderError(msg.error);
|
||||
logger.error(
|
||||
`[execute] Feature ${featureId} received error from provider. ` +
|
||||
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
|
||||
);
|
||||
throw new Error(sanitized);
|
||||
} else if (msg.type === 'result') {
|
||||
if (msg.subtype === 'success') {
|
||||
scheduleWrite();
|
||||
} else if (msg.subtype?.startsWith('error')) {
|
||||
// Non-success result subtypes from the SDK (error_max_turns, error_during_execution, etc.)
|
||||
logger.error(
|
||||
`[execute] Feature ${featureId} ended with error subtype: ${msg.subtype}. ` +
|
||||
`session_id=${msg.session_id ?? 'none'}`
|
||||
);
|
||||
throw new Error(`Agent execution ended with: ${msg.subtype}`);
|
||||
} else {
|
||||
logger.warn(
|
||||
`[execute] Feature ${featureId} received unhandled result subtype: ${msg.subtype}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
clearInterval(streamHeartbeat);
|
||||
@@ -447,16 +467,28 @@ export class AgentExecutor {
|
||||
});
|
||||
}
|
||||
} else if (msg.type === 'error') {
|
||||
// Clean the error: strip ANSI codes and redundant "Error: " prefix
|
||||
const cleanedError =
|
||||
(msg.error || `Error during task ${task.id}`)
|
||||
.replace(/\x1b\[[0-9;]*m/g, '')
|
||||
.replace(/^Error:\s*/i, '')
|
||||
.trim() || `Error during task ${task.id}`;
|
||||
throw new Error(cleanedError);
|
||||
} else if (msg.type === 'result' && msg.subtype === 'success') {
|
||||
taskOutput += msg.result || '';
|
||||
responseText += msg.result || '';
|
||||
const fallback = `Error during task ${task.id}`;
|
||||
const sanitized = AgentExecutor.sanitizeProviderError(msg.error || fallback);
|
||||
logger.error(
|
||||
`[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
|
||||
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
|
||||
);
|
||||
throw new Error(sanitized);
|
||||
} else if (msg.type === 'result') {
|
||||
if (msg.subtype === 'success') {
|
||||
taskOutput += msg.result || '';
|
||||
responseText += msg.result || '';
|
||||
} else if (msg.subtype?.startsWith('error')) {
|
||||
logger.error(
|
||||
`[executeTasksLoop] Feature ${featureId} task ${task.id} ended with error subtype: ${msg.subtype}. ` +
|
||||
`session_id=${msg.session_id ?? 'none'}`
|
||||
);
|
||||
throw new Error(`Agent execution ended with: ${msg.subtype}`);
|
||||
} else {
|
||||
logger.warn(
|
||||
`[executeTasksLoop] Feature ${featureId} task ${task.id} received unhandled result subtype: ${msg.subtype}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!taskCompleteDetected)
|
||||
|
||||
@@ -60,6 +60,12 @@ import type {
|
||||
|
||||
const logger = createLogger('ExecutionService');
|
||||
|
||||
/** Marker written by agent-executor for each tool invocation. */
|
||||
const TOOL_USE_MARKER = '🔧 Tool:';
|
||||
|
||||
/** Minimum trimmed output length to consider agent work meaningful. */
|
||||
const MIN_MEANINGFUL_OUTPUT_LENGTH = 200;
|
||||
|
||||
export class ExecutionService {
|
||||
constructor(
|
||||
private eventBus: TypedEventBus,
|
||||
@@ -409,7 +415,41 @@ Please continue from where you left off and complete all remaining tasks. Use th
|
||||
}
|
||||
}
|
||||
|
||||
const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified';
|
||||
// Read agent output before determining final status.
|
||||
// CLI-based providers (Cursor, Codex, etc.) may exit quickly without doing
|
||||
// meaningful work. Check output to avoid prematurely marking as 'verified'.
|
||||
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
|
||||
let agentOutput = '';
|
||||
try {
|
||||
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
|
||||
} catch {
|
||||
/* */
|
||||
}
|
||||
|
||||
// Determine if the agent did meaningful work by checking for tool usage
|
||||
// indicators in the output. The agent executor writes "🔧 Tool:" markers
|
||||
// each time a tool is invoked. No tool usage suggests the CLI exited
|
||||
// without performing implementation work.
|
||||
const hasToolUsage = agentOutput.includes(TOOL_USE_MARKER);
|
||||
const isOutputTooShort = agentOutput.trim().length < MIN_MEANINGFUL_OUTPUT_LENGTH;
|
||||
const agentDidWork = hasToolUsage && !isOutputTooShort;
|
||||
|
||||
let finalStatus: 'verified' | 'waiting_approval';
|
||||
if (feature.skipTests) {
|
||||
finalStatus = 'waiting_approval';
|
||||
} else if (!agentDidWork) {
|
||||
// Agent didn't produce meaningful output (e.g., CLI exited quickly).
|
||||
// Route to waiting_approval so the user can review and re-run.
|
||||
finalStatus = 'waiting_approval';
|
||||
logger.warn(
|
||||
`[executeFeature] Feature ${featureId}: agent produced insufficient output ` +
|
||||
`(${agentOutput.trim().length}/${MIN_MEANINGFUL_OUTPUT_LENGTH} chars, toolUsage=${hasToolUsage}). ` +
|
||||
`Setting status to waiting_approval instead of verified.`
|
||||
);
|
||||
} else {
|
||||
finalStatus = 'verified';
|
||||
}
|
||||
|
||||
await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
|
||||
this.recordSuccessFn();
|
||||
|
||||
@@ -421,13 +461,6 @@ Please continue from where you left off and complete all remaining tasks. Use th
|
||||
const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
|
||||
|
||||
try {
|
||||
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
|
||||
let agentOutput = '';
|
||||
try {
|
||||
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
|
||||
} catch {
|
||||
/* */
|
||||
}
|
||||
if (agentOutput) {
|
||||
const summary = extractSummary(agentOutput);
|
||||
if (summary) await this.saveFeatureSummaryFn(projectPath, featureId, summary);
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { normalizeThinkingLevelForModel } from '@automaker/types';
|
||||
|
||||
describe('normalizeThinkingLevelForModel', () => {
|
||||
it('preserves explicitly selected none for Opus models', () => {
|
||||
expect(normalizeThinkingLevelForModel('claude-opus', 'none')).toBe('none');
|
||||
});
|
||||
|
||||
it('falls back to none when Opus receives an unsupported manual thinking level', () => {
|
||||
expect(normalizeThinkingLevelForModel('claude-opus', 'medium')).toBe('none');
|
||||
});
|
||||
|
||||
it('keeps adaptive for Opus when adaptive is selected', () => {
|
||||
expect(normalizeThinkingLevelForModel('claude-opus', 'adaptive')).toBe('adaptive');
|
||||
});
|
||||
|
||||
it('preserves supported manual levels for non-Opus models', () => {
|
||||
expect(normalizeThinkingLevelForModel('claude-sonnet', 'high')).toBe('high');
|
||||
});
|
||||
});
|
||||
@@ -397,6 +397,45 @@ describe('copilot-provider.ts', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('should use error code in fallback when session.error message is empty', () => {
|
||||
const event = {
|
||||
type: 'session.error',
|
||||
data: { message: '', code: 'RATE_LIMIT_EXCEEDED' },
|
||||
};
|
||||
|
||||
const result = provider.normalizeEvent(event);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.type).toBe('error');
|
||||
expect(result!.error).toContain('RATE_LIMIT_EXCEEDED');
|
||||
expect(result!.error).not.toBe('Unknown error');
|
||||
});
|
||||
|
||||
it('should return generic "Copilot agent error" fallback when both message and code are empty', () => {
|
||||
const event = {
|
||||
type: 'session.error',
|
||||
data: { message: '', code: '' },
|
||||
};
|
||||
|
||||
const result = provider.normalizeEvent(event);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.type).toBe('error');
|
||||
expect(result!.error).toBe('Copilot agent error');
|
||||
// Must NOT be the old opaque 'Unknown error'
|
||||
expect(result!.error).not.toBe('Unknown error');
|
||||
});
|
||||
|
||||
it('should return generic "Copilot agent error" fallback when data has no code field', () => {
|
||||
const event = {
|
||||
type: 'session.error',
|
||||
data: { message: '' },
|
||||
};
|
||||
|
||||
const result = provider.normalizeEvent(event);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.type).toBe('error');
|
||||
expect(result!.error).toBe('Copilot agent error');
|
||||
});
|
||||
|
||||
it('should return null for unknown event types', () => {
|
||||
const event = { type: 'unknown.event' };
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { CursorProvider } from '@/providers/cursor-provider.js';
|
||||
|
||||
describe('cursor-provider.ts', () => {
|
||||
@@ -36,4 +36,122 @@ describe('cursor-provider.ts', () => {
|
||||
expect(args).not.toContain('--resume');
|
||||
});
|
||||
});
|
||||
|
||||
describe('normalizeEvent - result error handling', () => {
|
||||
let provider: CursorProvider;
|
||||
|
||||
beforeEach(() => {
|
||||
provider = Object.create(CursorProvider.prototype) as CursorProvider;
|
||||
});
|
||||
|
||||
it('returns error message from resultEvent.error when is_error=true', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
is_error: true,
|
||||
error: 'Rate limit exceeded',
|
||||
result: '',
|
||||
subtype: 'error',
|
||||
duration_ms: 3000,
|
||||
session_id: 'sess-123',
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event);
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg!.type).toBe('error');
|
||||
expect(msg!.error).toBe('Rate limit exceeded');
|
||||
});
|
||||
|
||||
it('falls back to resultEvent.result when error field is empty and is_error=true', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
is_error: true,
|
||||
error: '',
|
||||
result: 'Process terminated unexpectedly',
|
||||
subtype: 'error',
|
||||
duration_ms: 5000,
|
||||
session_id: 'sess-456',
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event);
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg!.type).toBe('error');
|
||||
expect(msg!.error).toBe('Process terminated unexpectedly');
|
||||
});
|
||||
|
||||
it('builds diagnostic fallback when both error and result are empty and is_error=true', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
is_error: true,
|
||||
error: '',
|
||||
result: '',
|
||||
subtype: 'error',
|
||||
duration_ms: 5000,
|
||||
session_id: 'sess-789',
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event);
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg!.type).toBe('error');
|
||||
// Should contain diagnostic info rather than 'Unknown error'
|
||||
expect(msg!.error).toContain('5000ms');
|
||||
expect(msg!.error).toContain('sess-789');
|
||||
expect(msg!.error).not.toBe('Unknown error');
|
||||
});
|
||||
|
||||
it('preserves session_id in error message', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
is_error: true,
|
||||
error: 'Timeout occurred',
|
||||
result: '',
|
||||
subtype: 'error',
|
||||
duration_ms: 30000,
|
||||
session_id: 'my-session-id',
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event);
|
||||
|
||||
expect(msg!.session_id).toBe('my-session-id');
|
||||
});
|
||||
|
||||
it('uses "none" when session_id is missing from diagnostic fallback', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
is_error: true,
|
||||
error: '',
|
||||
result: '',
|
||||
subtype: 'error',
|
||||
duration_ms: 5000,
|
||||
// session_id intentionally omitted
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event);
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg!.type).toBe('error');
|
||||
expect(msg!.error).toContain('none');
|
||||
expect(msg!.error).not.toContain('undefined');
|
||||
});
|
||||
|
||||
it('returns success result when is_error=false', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
is_error: false,
|
||||
error: '',
|
||||
result: 'Completed successfully',
|
||||
subtype: 'success',
|
||||
duration_ms: 2000,
|
||||
session_id: 'sess-ok',
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event);
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg!.type).toBe('result');
|
||||
expect(msg!.subtype).toBe('success');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { GeminiProvider } from '@/providers/gemini-provider.js';
|
||||
import type { ProviderMessage } from '@automaker/types';
|
||||
|
||||
describe('gemini-provider.ts', () => {
|
||||
let provider: GeminiProvider;
|
||||
@@ -116,4 +117,140 @@ describe('gemini-provider.ts', () => {
|
||||
expect(args[modelIndex + 1]).toBe('gemini-2.5-pro');
|
||||
});
|
||||
});
|
||||
|
||||
describe('normalizeEvent - error handling', () => {
|
||||
it('returns error from result event when status=error and error field is set', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
status: 'error',
|
||||
error: 'Model overloaded',
|
||||
session_id: 'sess-gemini-1',
|
||||
stats: { duration_ms: 4000, total_tokens: 0 },
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event) as ProviderMessage;
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg.type).toBe('error');
|
||||
expect(msg.error).toBe('Model overloaded');
|
||||
expect(msg.session_id).toBe('sess-gemini-1');
|
||||
});
|
||||
|
||||
it('builds diagnostic fallback when result event has status=error but empty error field', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
status: 'error',
|
||||
error: '',
|
||||
session_id: 'sess-gemini-2',
|
||||
stats: { duration_ms: 7500, total_tokens: 0 },
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event) as ProviderMessage;
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg.type).toBe('error');
|
||||
// Diagnostic info should be present instead of 'Unknown error'
|
||||
expect(msg.error).toContain('7500ms');
|
||||
expect(msg.error).toContain('sess-gemini-2');
|
||||
expect(msg.error).not.toBe('Unknown error');
|
||||
});
|
||||
|
||||
it('builds fallback with "unknown" duration when stats are missing', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
status: 'error',
|
||||
error: '',
|
||||
session_id: 'sess-gemini-nostats',
|
||||
// no stats field
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event) as ProviderMessage;
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg.type).toBe('error');
|
||||
expect(msg.error).toContain('unknown');
|
||||
});
|
||||
|
||||
it('returns error from standalone error event with error field set', () => {
|
||||
const event = {
|
||||
type: 'error',
|
||||
error: 'API key invalid',
|
||||
session_id: 'sess-gemini-3',
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event) as ProviderMessage;
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg.type).toBe('error');
|
||||
expect(msg.error).toBe('API key invalid');
|
||||
});
|
||||
|
||||
it('builds diagnostic fallback when standalone error event has empty error field', () => {
|
||||
const event = {
|
||||
type: 'error',
|
||||
error: '',
|
||||
session_id: 'sess-gemini-empty',
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event) as ProviderMessage;
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg.type).toBe('error');
|
||||
// Should include session_id, not just 'Unknown error'
|
||||
expect(msg.error).toContain('sess-gemini-empty');
|
||||
expect(msg.error).not.toBe('Unknown error');
|
||||
});
|
||||
|
||||
it('builds fallback mentioning "none" when session_id is missing from error event', () => {
|
||||
const event = {
|
||||
type: 'error',
|
||||
error: '',
|
||||
// no session_id
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event) as ProviderMessage;
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg.type).toBe('error');
|
||||
expect(msg.error).toContain('none');
|
||||
});
|
||||
|
||||
it('uses consistent "Gemini agent failed" label for both result and error event fallbacks', () => {
|
||||
const resultEvent = {
|
||||
type: 'result',
|
||||
status: 'error',
|
||||
error: '',
|
||||
session_id: 'sess-r',
|
||||
stats: { duration_ms: 1000 },
|
||||
};
|
||||
const errorEvent = {
|
||||
type: 'error',
|
||||
error: '',
|
||||
session_id: 'sess-e',
|
||||
};
|
||||
|
||||
const resultMsg = provider.normalizeEvent(resultEvent) as ProviderMessage;
|
||||
const errorMsg = provider.normalizeEvent(errorEvent) as ProviderMessage;
|
||||
|
||||
// Both fallback messages should use the same "Gemini agent failed" prefix
|
||||
expect(resultMsg.error).toContain('Gemini agent failed');
|
||||
expect(errorMsg.error).toContain('Gemini agent failed');
|
||||
});
|
||||
|
||||
it('returns success result when result event has status=success', () => {
|
||||
const event = {
|
||||
type: 'result',
|
||||
status: 'success',
|
||||
error: '',
|
||||
session_id: 'sess-gemini-ok',
|
||||
stats: { duration_ms: 1200, total_tokens: 500 },
|
||||
};
|
||||
|
||||
const msg = provider.normalizeEvent(event) as ProviderMessage;
|
||||
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg.type).toBe('result');
|
||||
expect(msg.subtype).toBe('success');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -685,6 +685,309 @@ describe('AgentExecutor', () => {
|
||||
await expect(executor.execute(options, callbacks)).rejects.toThrow('API rate limit exceeded');
|
||||
});
|
||||
|
||||
it('should throw "Unknown error" when provider stream yields error with empty message', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
mockFeatureStateManager,
|
||||
mockPlanApprovalService,
|
||||
mockSettingsService
|
||||
);
|
||||
|
||||
const mockProvider = {
|
||||
getName: () => 'mock',
|
||||
executeQuery: vi.fn().mockImplementation(function* () {
|
||||
yield {
|
||||
type: 'error',
|
||||
error: '',
|
||||
session_id: 'sess-123',
|
||||
};
|
||||
}),
|
||||
} as unknown as BaseProvider;
|
||||
|
||||
const options: AgentExecutionOptions = {
|
||||
workDir: '/test',
|
||||
featureId: 'test-feature',
|
||||
prompt: 'Test prompt',
|
||||
projectPath: '/project',
|
||||
abortController: new AbortController(),
|
||||
provider: mockProvider,
|
||||
effectiveBareModel: 'claude-sonnet-4-6',
|
||||
planningMode: 'skip',
|
||||
};
|
||||
|
||||
const callbacks = {
|
||||
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
|
||||
saveFeatureSummary: vi.fn(),
|
||||
updateFeatureSummary: vi.fn(),
|
||||
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
|
||||
};
|
||||
|
||||
await expect(executor.execute(options, callbacks)).rejects.toThrow('Unknown error');
|
||||
});
|
||||
|
||||
it('should throw with sanitized error when provider yields ANSI-decorated error', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
mockFeatureStateManager,
|
||||
mockPlanApprovalService,
|
||||
mockSettingsService
|
||||
);
|
||||
|
||||
const mockProvider = {
|
||||
getName: () => 'mock',
|
||||
executeQuery: vi.fn().mockImplementation(function* () {
|
||||
yield {
|
||||
type: 'error',
|
||||
// ANSI color codes + "Error: " prefix that should be stripped
|
||||
error: '\x1b[31mError: Connection refused\x1b[0m',
|
||||
};
|
||||
}),
|
||||
} as unknown as BaseProvider;
|
||||
|
||||
const options: AgentExecutionOptions = {
|
||||
workDir: '/test',
|
||||
featureId: 'test-feature',
|
||||
prompt: 'Test prompt',
|
||||
projectPath: '/project',
|
||||
abortController: new AbortController(),
|
||||
provider: mockProvider,
|
||||
effectiveBareModel: 'claude-sonnet-4-6',
|
||||
planningMode: 'skip',
|
||||
};
|
||||
|
||||
const callbacks = {
|
||||
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
|
||||
saveFeatureSummary: vi.fn(),
|
||||
updateFeatureSummary: vi.fn(),
|
||||
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
|
||||
};
|
||||
|
||||
// Should strip ANSI codes and "Error: " prefix
|
||||
await expect(executor.execute(options, callbacks)).rejects.toThrow('Connection refused');
|
||||
});
|
||||
|
||||
it('should throw when result subtype is error_max_turns', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
mockFeatureStateManager,
|
||||
mockPlanApprovalService,
|
||||
mockSettingsService
|
||||
);
|
||||
|
||||
const mockProvider = {
|
||||
getName: () => 'mock',
|
||||
executeQuery: vi.fn().mockImplementation(function* () {
|
||||
yield {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [{ type: 'text', text: 'Working on it...' }],
|
||||
},
|
||||
};
|
||||
yield {
|
||||
type: 'result',
|
||||
subtype: 'error_max_turns',
|
||||
session_id: 'sess-456',
|
||||
};
|
||||
}),
|
||||
} as unknown as BaseProvider;
|
||||
|
||||
const options: AgentExecutionOptions = {
|
||||
workDir: '/test',
|
||||
featureId: 'test-feature',
|
||||
prompt: 'Test prompt',
|
||||
projectPath: '/project',
|
||||
abortController: new AbortController(),
|
||||
provider: mockProvider,
|
||||
effectiveBareModel: 'claude-sonnet-4-6',
|
||||
planningMode: 'skip',
|
||||
};
|
||||
|
||||
const callbacks = {
|
||||
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
|
||||
saveFeatureSummary: vi.fn(),
|
||||
updateFeatureSummary: vi.fn(),
|
||||
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
|
||||
};
|
||||
|
||||
await expect(executor.execute(options, callbacks)).rejects.toThrow(
|
||||
'Agent execution ended with: error_max_turns'
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw when result subtype is error_during_execution', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
mockFeatureStateManager,
|
||||
mockPlanApprovalService,
|
||||
mockSettingsService
|
||||
);
|
||||
|
||||
const mockProvider = {
|
||||
getName: () => 'mock',
|
||||
executeQuery: vi.fn().mockImplementation(function* () {
|
||||
yield {
|
||||
type: 'result',
|
||||
subtype: 'error_during_execution',
|
||||
session_id: 'sess-789',
|
||||
};
|
||||
}),
|
||||
} as unknown as BaseProvider;
|
||||
|
||||
const options: AgentExecutionOptions = {
|
||||
workDir: '/test',
|
||||
featureId: 'test-feature',
|
||||
prompt: 'Test prompt',
|
||||
projectPath: '/project',
|
||||
abortController: new AbortController(),
|
||||
provider: mockProvider,
|
||||
effectiveBareModel: 'claude-sonnet-4-6',
|
||||
planningMode: 'skip',
|
||||
};
|
||||
|
||||
const callbacks = {
|
||||
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
|
||||
saveFeatureSummary: vi.fn(),
|
||||
updateFeatureSummary: vi.fn(),
|
||||
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
|
||||
};
|
||||
|
||||
await expect(executor.execute(options, callbacks)).rejects.toThrow(
|
||||
'Agent execution ended with: error_during_execution'
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw when result subtype is error_max_structured_output_retries', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
mockFeatureStateManager,
|
||||
mockPlanApprovalService,
|
||||
mockSettingsService
|
||||
);
|
||||
|
||||
const mockProvider = {
|
||||
getName: () => 'mock',
|
||||
executeQuery: vi.fn().mockImplementation(function* () {
|
||||
yield {
|
||||
type: 'result',
|
||||
subtype: 'error_max_structured_output_retries',
|
||||
};
|
||||
}),
|
||||
} as unknown as BaseProvider;
|
||||
|
||||
const options: AgentExecutionOptions = {
|
||||
workDir: '/test',
|
||||
featureId: 'test-feature',
|
||||
prompt: 'Test prompt',
|
||||
projectPath: '/project',
|
||||
abortController: new AbortController(),
|
||||
provider: mockProvider,
|
||||
effectiveBareModel: 'claude-sonnet-4-6',
|
||||
planningMode: 'skip',
|
||||
};
|
||||
|
||||
const callbacks = {
|
||||
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
|
||||
saveFeatureSummary: vi.fn(),
|
||||
updateFeatureSummary: vi.fn(),
|
||||
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
|
||||
};
|
||||
|
||||
await expect(executor.execute(options, callbacks)).rejects.toThrow(
|
||||
'Agent execution ended with: error_max_structured_output_retries'
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw when result subtype is error_max_budget_usd', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
mockFeatureStateManager,
|
||||
mockPlanApprovalService,
|
||||
mockSettingsService
|
||||
);
|
||||
|
||||
const mockProvider = {
|
||||
getName: () => 'mock',
|
||||
executeQuery: vi.fn().mockImplementation(function* () {
|
||||
yield {
|
||||
type: 'result',
|
||||
subtype: 'error_max_budget_usd',
|
||||
session_id: 'sess-budget',
|
||||
};
|
||||
}),
|
||||
} as unknown as BaseProvider;
|
||||
|
||||
const options: AgentExecutionOptions = {
|
||||
workDir: '/test',
|
||||
featureId: 'test-feature',
|
||||
prompt: 'Test prompt',
|
||||
projectPath: '/project',
|
||||
abortController: new AbortController(),
|
||||
provider: mockProvider,
|
||||
effectiveBareModel: 'claude-sonnet-4-6',
|
||||
planningMode: 'skip',
|
||||
};
|
||||
|
||||
const callbacks = {
|
||||
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
|
||||
saveFeatureSummary: vi.fn(),
|
||||
updateFeatureSummary: vi.fn(),
|
||||
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
|
||||
};
|
||||
|
||||
await expect(executor.execute(options, callbacks)).rejects.toThrow(
|
||||
'Agent execution ended with: error_max_budget_usd'
|
||||
);
|
||||
});
|
||||
|
||||
it('should NOT throw when result subtype is success', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
mockFeatureStateManager,
|
||||
mockPlanApprovalService,
|
||||
mockSettingsService
|
||||
);
|
||||
|
||||
const mockProvider = {
|
||||
getName: () => 'mock',
|
||||
executeQuery: vi.fn().mockImplementation(function* () {
|
||||
yield {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [{ type: 'text', text: 'Done!' }],
|
||||
},
|
||||
};
|
||||
yield {
|
||||
type: 'result',
|
||||
subtype: 'success',
|
||||
session_id: 'sess-ok',
|
||||
};
|
||||
}),
|
||||
} as unknown as BaseProvider;
|
||||
|
||||
const options: AgentExecutionOptions = {
|
||||
workDir: '/test',
|
||||
featureId: 'test-feature',
|
||||
prompt: 'Test prompt',
|
||||
projectPath: '/project',
|
||||
abortController: new AbortController(),
|
||||
provider: mockProvider,
|
||||
effectiveBareModel: 'claude-sonnet-4-6',
|
||||
planningMode: 'skip',
|
||||
};
|
||||
|
||||
const callbacks = {
|
||||
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
|
||||
saveFeatureSummary: vi.fn(),
|
||||
updateFeatureSummary: vi.fn(),
|
||||
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
|
||||
};
|
||||
|
||||
// Should resolve without throwing
|
||||
const result = await executor.execute(options, callbacks);
|
||||
expect(result.aborted).toBe(false);
|
||||
expect(result.responseText).toContain('Done!');
|
||||
});
|
||||
|
||||
it('should throw error when authentication fails in response', async () => {
|
||||
const executor = new AgentExecutor(
|
||||
mockEventBus,
|
||||
|
||||
192
apps/server/tests/unit/services/agent-output-validation.test.ts
Normal file
192
apps/server/tests/unit/services/agent-output-validation.test.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
/**
|
||||
* Contract tests verifying the tool marker format used by agent-executor
|
||||
* (which writes agent output) and execution-service (which reads it to
|
||||
* determine if the agent did meaningful work).
|
||||
*
|
||||
* The agent-executor writes: `\n🔧 Tool: ${block.name}\n`
|
||||
* The execution-service checks: `agentOutput.includes('🔧 Tool:')`
|
||||
*
|
||||
* These tests ensure the marker format contract stays consistent and
|
||||
* document the exact detection logic used for status determination.
|
||||
*/
|
||||
|
||||
// The exact marker prefix that execution-service searches for
|
||||
const TOOL_MARKER = '🔧 Tool:';
|
||||
|
||||
// Minimum output length threshold for "meaningful work"
|
||||
const MIN_OUTPUT_LENGTH = 200;
|
||||
|
||||
/**
|
||||
* Simulates the agent-executor's tool_use output format.
|
||||
* See: agent-executor.ts line ~293
|
||||
*/
|
||||
function formatToolUseBlock(toolName: string, input?: Record<string, unknown>): string {
|
||||
let output = `\n${TOOL_MARKER} ${toolName}\n`;
|
||||
if (input) output += `Input: ${JSON.stringify(input, null, 2)}\n`;
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulates the execution-service's output validation logic.
|
||||
* See: execution-service.ts lines ~427-429
|
||||
*/
|
||||
function validateAgentOutput(
|
||||
agentOutput: string,
|
||||
skipTests: boolean
|
||||
): 'verified' | 'waiting_approval' {
|
||||
const hasToolUsage = agentOutput.includes(TOOL_MARKER);
|
||||
const hasMinimalOutput = agentOutput.trim().length < MIN_OUTPUT_LENGTH;
|
||||
const agentDidWork = hasToolUsage && !hasMinimalOutput;
|
||||
|
||||
if (skipTests) return 'waiting_approval';
|
||||
if (!agentDidWork) return 'waiting_approval';
|
||||
return 'verified';
|
||||
}
|
||||
|
||||
describe('Agent Output Validation - Contract Tests', () => {
|
||||
describe('tool marker format contract', () => {
|
||||
it('agent-executor tool format contains the expected marker', () => {
|
||||
const toolOutput = formatToolUseBlock('Read', { file_path: '/src/index.ts' });
|
||||
expect(toolOutput).toContain(TOOL_MARKER);
|
||||
});
|
||||
|
||||
it('agent-executor tool format includes tool name after marker', () => {
|
||||
const toolOutput = formatToolUseBlock('Edit', {
|
||||
file_path: '/src/app.ts',
|
||||
old_string: 'foo',
|
||||
new_string: 'bar',
|
||||
});
|
||||
expect(toolOutput).toContain('🔧 Tool: Edit');
|
||||
});
|
||||
|
||||
it('agent-executor tool format includes JSON input', () => {
|
||||
const input = { file_path: '/src/index.ts' };
|
||||
const toolOutput = formatToolUseBlock('Read', input);
|
||||
expect(toolOutput).toContain('Input: ');
|
||||
expect(toolOutput).toContain('"file_path": "/src/index.ts"');
|
||||
});
|
||||
|
||||
it('agent-executor tool format works without input', () => {
|
||||
const toolOutput = formatToolUseBlock('Bash');
|
||||
expect(toolOutput).toContain('🔧 Tool: Bash');
|
||||
expect(toolOutput).not.toContain('Input:');
|
||||
});
|
||||
|
||||
it('marker includes colon and space to avoid false positives', () => {
|
||||
// Ensure the marker is specific enough to avoid matching other emoji patterns
|
||||
expect(TOOL_MARKER).toBe('🔧 Tool:');
|
||||
expect(TOOL_MARKER).toContain(':');
|
||||
});
|
||||
});
|
||||
|
||||
describe('output validation logic', () => {
|
||||
it('verified: tool usage + sufficient output', () => {
|
||||
const output =
|
||||
'Starting implementation of the new feature...\n' +
|
||||
formatToolUseBlock('Read', { file_path: '/src/index.ts' }) +
|
||||
'I can see the existing code. Let me make the needed changes.\n' +
|
||||
formatToolUseBlock('Edit', { file_path: '/src/index.ts' }) +
|
||||
'Changes complete. The implementation adds new validation logic and tests.';
|
||||
expect(output.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
|
||||
|
||||
expect(validateAgentOutput(output, false)).toBe('verified');
|
||||
});
|
||||
|
||||
it('waiting_approval: no tool markers regardless of length', () => {
|
||||
const longOutput = 'I analyzed the codebase. '.repeat(50);
|
||||
expect(longOutput.trim().length).toBeGreaterThan(MIN_OUTPUT_LENGTH);
|
||||
|
||||
expect(validateAgentOutput(longOutput, false)).toBe('waiting_approval');
|
||||
});
|
||||
|
||||
it('waiting_approval: tool markers but insufficient length', () => {
|
||||
const shortOutput = formatToolUseBlock('Read', { file_path: '/src/a.ts' });
|
||||
expect(shortOutput.trim().length).toBeLessThan(MIN_OUTPUT_LENGTH);
|
||||
|
||||
expect(validateAgentOutput(shortOutput, false)).toBe('waiting_approval');
|
||||
});
|
||||
|
||||
it('waiting_approval: empty output', () => {
|
||||
expect(validateAgentOutput('', false)).toBe('waiting_approval');
|
||||
});
|
||||
|
||||
it('waiting_approval: skipTests always overrides', () => {
|
||||
const goodOutput =
|
||||
'Starting...\n' +
|
||||
formatToolUseBlock('Read', { file_path: '/src/index.ts' }) +
|
||||
formatToolUseBlock('Edit', { file_path: '/src/index.ts' }) +
|
||||
'Done implementing. '.repeat(15);
|
||||
expect(goodOutput.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
|
||||
|
||||
expect(validateAgentOutput(goodOutput, true)).toBe('waiting_approval');
|
||||
});
|
||||
|
||||
it('boundary: exactly MIN_OUTPUT_LENGTH chars with tool is verified', () => {
|
||||
const tool = formatToolUseBlock('Read');
|
||||
const padding = 'x'.repeat(MIN_OUTPUT_LENGTH - tool.trim().length);
|
||||
const output = tool + padding;
|
||||
expect(output.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
|
||||
|
||||
expect(validateAgentOutput(output, false)).toBe('verified');
|
||||
});
|
||||
|
||||
it('boundary: MIN_OUTPUT_LENGTH - 1 chars with tool is waiting_approval', () => {
|
||||
const marker = `${TOOL_MARKER} Read\n`;
|
||||
const padding = 'x'.repeat(MIN_OUTPUT_LENGTH - 1 - marker.length);
|
||||
const output = marker + padding;
|
||||
expect(output.trim().length).toBe(MIN_OUTPUT_LENGTH - 1);
|
||||
|
||||
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
|
||||
});
|
||||
});
|
||||
|
||||
describe('realistic provider scenarios', () => {
|
||||
it('Claude SDK agent with multiple tools → verified', () => {
|
||||
let output = "I'll implement the feature.\n\n";
|
||||
output += formatToolUseBlock('Read', { file_path: '/src/components/App.tsx' });
|
||||
output += 'I see the component. Let me update it.\n\n';
|
||||
output += formatToolUseBlock('Edit', {
|
||||
file_path: '/src/components/App.tsx',
|
||||
old_string: 'const App = () => {',
|
||||
new_string: 'const App: React.FC = () => {',
|
||||
});
|
||||
output += 'Done. The component is now typed correctly.\n';
|
||||
|
||||
expect(validateAgentOutput(output, false)).toBe('verified');
|
||||
});
|
||||
|
||||
it('Cursor CLI quick exit (no tools) → waiting_approval', () => {
|
||||
const output = 'Task received. Processing...\nResult: completed successfully.';
|
||||
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
|
||||
});
|
||||
|
||||
it('Codex CLI with brief acknowledgment → waiting_approval', () => {
|
||||
const output = 'Understood the task. Starting implementation.\nDone.';
|
||||
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
|
||||
});
|
||||
|
||||
it('Agent that only reads but makes no edits (single Read tool, short output) → waiting_approval', () => {
|
||||
const output = formatToolUseBlock('Read', { file_path: '/src/index.ts' }) + 'File read.';
|
||||
expect(output.trim().length).toBeLessThan(MIN_OUTPUT_LENGTH);
|
||||
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
|
||||
});
|
||||
|
||||
it('Agent with extensive tool usage and explanation → verified', () => {
|
||||
let output = 'Analyzing the codebase for the authentication feature.\n\n';
|
||||
for (let i = 0; i < 5; i++) {
|
||||
output += formatToolUseBlock('Read', { file_path: `/src/auth/handler${i}.ts` });
|
||||
output += `Found handler ${i}. `;
|
||||
}
|
||||
output += formatToolUseBlock('Edit', {
|
||||
file_path: '/src/auth/handler0.ts',
|
||||
old_string: 'function login() {}',
|
||||
new_string: 'async function login(creds: Credentials) { ... }',
|
||||
});
|
||||
output += 'Implementation complete with all authentication changes applied.\n';
|
||||
|
||||
expect(validateAgentOutput(output, false)).toBe('verified');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -211,7 +211,14 @@ describe('execution-service.ts', () => {
|
||||
});
|
||||
|
||||
// Default mocks for secureFs
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue('Agent output content');
|
||||
// Include tool usage markers to simulate meaningful agent output.
|
||||
// The execution service checks for '🔧 Tool:' markers and minimum
|
||||
// output length to determine if the agent did real work.
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(
|
||||
'Starting implementation...\n\n🔧 Tool: Read\nInput: {"file_path": "/src/index.ts"}\n\n' +
|
||||
'🔧 Tool: Edit\nInput: {"file_path": "/src/index.ts", "old_string": "foo", "new_string": "bar"}\n\n' +
|
||||
'Implementation complete. Updated the code as requested.'
|
||||
);
|
||||
vi.mocked(secureFs.access).mockResolvedValue(undefined);
|
||||
|
||||
// Re-setup platform mocks
|
||||
@@ -1433,4 +1440,439 @@ describe('execution-service.ts', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('executeFeature - agent output validation', () => {
|
||||
// Helper to generate realistic agent output with tool markers
|
||||
const makeAgentOutput = (toolCount: number, extraText = ''): string => {
|
||||
let output = 'Starting implementation...\n\n';
|
||||
for (let i = 0; i < toolCount; i++) {
|
||||
output += `🔧 Tool: Edit\nInput: {"file_path": "/src/file${i}.ts", "old_string": "old${i}", "new_string": "new${i}"}\n\n`;
|
||||
}
|
||||
output += `Implementation complete. ${extraText}`;
|
||||
return output;
|
||||
};
|
||||
|
||||
const createServiceWithMocks = () => {
|
||||
return new ExecutionService(
|
||||
mockEventBus,
|
||||
mockConcurrencyManager,
|
||||
mockWorktreeResolver,
|
||||
mockSettingsService,
|
||||
mockRunAgentFn,
|
||||
mockExecutePipelineFn,
|
||||
mockUpdateFeatureStatusFn,
|
||||
mockLoadFeatureFn,
|
||||
mockGetPlanningPromptPrefixFn,
|
||||
mockSaveFeatureSummaryFn,
|
||||
mockRecordLearningsFn,
|
||||
mockContextExistsFn,
|
||||
mockResumeFeatureFn,
|
||||
mockTrackFailureFn,
|
||||
mockSignalPauseFn,
|
||||
mockRecordSuccessFn,
|
||||
mockSaveExecutionStateFn,
|
||||
mockLoadContextFilesFn
|
||||
);
|
||||
};
|
||||
|
||||
it('sets verified when agent output has tool usage and sufficient length', async () => {
|
||||
const output = makeAgentOutput(3, 'Updated authentication module with new login flow.');
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(output);
|
||||
|
||||
await service.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'verified'
|
||||
);
|
||||
});
|
||||
|
||||
it('sets waiting_approval when agent output is empty', async () => {
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue('');
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('sets waiting_approval when agent output has no tool usage markers', async () => {
|
||||
// Long output but no tool markers - agent printed text but didn't use tools
|
||||
const longOutputNoTools = 'I analyzed the codebase and found several issues. '.repeat(20);
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(longOutputNoTools);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('sets waiting_approval when agent output has tool markers but is too short', async () => {
|
||||
// Has a tool marker but total output is under 200 chars
|
||||
const shortWithTool = '🔧 Tool: Read\nInput: {"file_path": "/src/index.ts"}\nDone.';
|
||||
expect(shortWithTool.trim().length).toBeLessThan(200);
|
||||
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(shortWithTool);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('sets waiting_approval when agent output file is missing (ENOENT)', async () => {
|
||||
vi.mocked(secureFs.readFile).mockRejectedValue(new Error('ENOENT'));
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('sets waiting_approval when agent output is only whitespace', async () => {
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(' \n\n\t \n ');
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('sets verified when output is exactly at the 200 char threshold with tool usage', async () => {
|
||||
// Create output that's exactly 200 chars trimmed with tool markers
|
||||
const toolMarker = '🔧 Tool: Edit\nInput: {"file_path": "/src/index.ts"}\n';
|
||||
const padding = 'x'.repeat(200 - toolMarker.length);
|
||||
const output = toolMarker + padding;
|
||||
expect(output.trim().length).toBeGreaterThanOrEqual(200);
|
||||
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(output);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'verified'
|
||||
);
|
||||
});
|
||||
|
||||
it('sets waiting_approval when output is 199 chars with tool usage (below threshold)', async () => {
|
||||
const toolMarker = '🔧 Tool: Read\n';
|
||||
const padding = 'x'.repeat(199 - toolMarker.length);
|
||||
const output = toolMarker + padding;
|
||||
expect(output.trim().length).toBe(199);
|
||||
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(output);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('skipTests always takes priority over output validation', async () => {
|
||||
// Meaningful output with tool usage - would normally be 'verified'
|
||||
const output = makeAgentOutput(5, 'All changes applied successfully.');
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(output);
|
||||
|
||||
mockLoadFeatureFn = vi.fn().mockResolvedValue({ ...testFeature, skipTests: true });
|
||||
const svc = createServiceWithMocks();
|
||||
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// skipTests=true always means waiting_approval regardless of output quality
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('skipTests with empty output still results in waiting_approval', async () => {
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue('');
|
||||
|
||||
mockLoadFeatureFn = vi.fn().mockResolvedValue({ ...testFeature, skipTests: true });
|
||||
const svc = createServiceWithMocks();
|
||||
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('still records success even when output validation fails', async () => {
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue('');
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// recordSuccess should still be called - the agent ran without errors
|
||||
expect(mockRecordSuccessFn).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('still extracts summary when output has content but no tool markers', async () => {
|
||||
const outputNoTools = 'A '.repeat(150); // > 200 chars but no tool markers
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(outputNoTools);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// Summary extraction still runs even though status is waiting_approval
|
||||
expect(extractSummary).toHaveBeenCalledWith(outputNoTools);
|
||||
expect(mockSaveFeatureSummaryFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'Test summary'
|
||||
);
|
||||
});
|
||||
|
||||
it('emits feature_complete with passes=true even when output validation routes to waiting_approval', async () => {
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue('');
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, true);
|
||||
|
||||
// The agent ran without error - it's still a "pass" from the execution perspective
|
||||
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
|
||||
'auto_mode_feature_complete',
|
||||
expect.objectContaining({ passes: true })
|
||||
);
|
||||
});
|
||||
|
||||
it('handles realistic Cursor CLI output that exits quickly', async () => {
|
||||
// Simulates a Cursor CLI that prints a brief message and exits
|
||||
const cursorQuickExit = 'Task received. Processing...\nResult: completed successfully.';
|
||||
expect(cursorQuickExit.includes('🔧 Tool:')).toBe(false);
|
||||
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(cursorQuickExit);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// No tool usage = waiting_approval
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('handles realistic Claude SDK output with multiple tool uses', async () => {
|
||||
// Simulates a Claude SDK agent that does real work
|
||||
const claudeOutput =
|
||||
"I'll implement the requested feature.\n\n" +
|
||||
'🔧 Tool: Read\nInput: {"file_path": "/src/components/App.tsx"}\n\n' +
|
||||
'I can see the existing component structure. Let me modify it.\n\n' +
|
||||
'🔧 Tool: Edit\nInput: {"file_path": "/src/components/App.tsx", "old_string": "const App = () => {", "new_string": "const App: React.FC = () => {"}\n\n' +
|
||||
'🔧 Tool: Write\nInput: {"file_path": "/src/components/NewFeature.tsx"}\n\n' +
|
||||
"I've created the new component and updated the existing one. The feature is now implemented with proper TypeScript types.";
|
||||
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(claudeOutput);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// Real work = verified
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'verified'
|
||||
);
|
||||
});
|
||||
|
||||
it('reads agent output from the correct path with utf-8 encoding', async () => {
|
||||
const output = makeAgentOutput(2, 'Done with changes.');
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(output);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// Verify readFile was called with the correct path derived from getFeatureDir
|
||||
expect(secureFs.readFile).toHaveBeenCalledWith(
|
||||
'/test/project/.automaker/features/feature-1/agent-output.md',
|
||||
'utf-8'
|
||||
);
|
||||
});
|
||||
|
||||
it('completion message includes auto-verified when status is verified', async () => {
|
||||
const output = makeAgentOutput(3, 'All changes applied.');
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(output);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, true);
|
||||
|
||||
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
|
||||
'auto_mode_feature_complete',
|
||||
expect.objectContaining({
|
||||
message: expect.stringContaining('auto-verified'),
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it('completion message does NOT include auto-verified when status is waiting_approval', async () => {
|
||||
// Empty output → waiting_approval
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue('');
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, true);
|
||||
|
||||
const completeCall = vi
|
||||
.mocked(mockEventBus.emitAutoModeEvent)
|
||||
.mock.calls.find((call) => call[0] === 'auto_mode_feature_complete');
|
||||
expect(completeCall).toBeDefined();
|
||||
expect((completeCall![1] as { message: string }).message).not.toContain('auto-verified');
|
||||
});
|
||||
|
||||
it('uses same agentOutput for both status determination and summary extraction', async () => {
|
||||
// Specific output that is long enough with tool markers (verified path)
|
||||
// AND has content for summary extraction
|
||||
const specificOutput =
|
||||
'🔧 Tool: Read\nReading file...\n🔧 Tool: Edit\nEditing file...\n' +
|
||||
'The implementation is complete. Here is a detailed description of what was done. '.repeat(
|
||||
3
|
||||
);
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(specificOutput);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// Status should be verified (has tools + long enough)
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'verified'
|
||||
);
|
||||
// extractSummary should receive the exact same output
|
||||
expect(extractSummary).toHaveBeenCalledWith(specificOutput);
|
||||
// recordLearnings should also receive the same output
|
||||
expect(mockRecordLearningsFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
testFeature,
|
||||
specificOutput
|
||||
);
|
||||
});
|
||||
|
||||
it('does not call recordMemoryUsage when output is empty and memoryFiles is empty', async () => {
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue('');
|
||||
const { recordMemoryUsage } = await import('@automaker/utils');
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// With empty output and empty memoryFiles, recordMemoryUsage should not be called
|
||||
expect(recordMemoryUsage).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('handles output with special unicode characters correctly', async () => {
|
||||
// Output with various unicode but includes tool markers
|
||||
const unicodeOutput =
|
||||
'🔧 Tool: Read\n' +
|
||||
'🔧 Tool: Edit\n' +
|
||||
'Añadiendo función de búsqueda con caracteres especiales: ñ, ü, ö, é, 日本語テスト. ' +
|
||||
'Die Änderungen wurden erfolgreich implementiert. '.repeat(3);
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(unicodeOutput);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// Should still detect tool markers and sufficient length
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'verified'
|
||||
);
|
||||
});
|
||||
|
||||
it('treats output with only newlines and spaces around tool marker as insufficient', async () => {
|
||||
// Has tool marker but surrounded by whitespace, total trimmed < 200
|
||||
const sparseOutput = '\n\n 🔧 Tool: Read \n\n';
|
||||
expect(sparseOutput.trim().length).toBeLessThan(200);
|
||||
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(sparseOutput);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('detects tool marker substring correctly (partial match like "🔧 Tools:" does not count)', async () => {
|
||||
// Output with a similar but not exact marker - "🔧 Tools:" instead of "🔧 Tool:"
|
||||
const wrongMarker = '🔧 Tools: Read\n🔧 Tools: Edit\n' + 'Implementation done. '.repeat(20);
|
||||
expect(wrongMarker.includes('🔧 Tool:')).toBe(false);
|
||||
|
||||
vi.mocked(secureFs.readFile).mockResolvedValue(wrongMarker);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// "🔧 Tools:" is not the same as "🔧 Tool:" - should be waiting_approval
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'waiting_approval'
|
||||
);
|
||||
});
|
||||
|
||||
it('pipeline merge_conflict status short-circuits before output validation', async () => {
|
||||
// Set up pipeline that results in merge_conflict
|
||||
vi.mocked(pipelineService.getPipelineConfig).mockResolvedValue({
|
||||
version: 1,
|
||||
steps: [{ id: 'step-1', name: 'Step 1', order: 1, instructions: 'Do step 1' }] as any,
|
||||
});
|
||||
|
||||
// After pipeline, loadFeature returns merge_conflict status
|
||||
let loadCallCount = 0;
|
||||
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
|
||||
loadCallCount++;
|
||||
if (loadCallCount === 1) return testFeature; // initial load
|
||||
// All subsequent loads (task check + pipeline refresh) return merge_conflict
|
||||
return { ...testFeature, status: 'merge_conflict' };
|
||||
});
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// Should NOT have called updateFeatureStatusFn with 'verified' or 'waiting_approval'
|
||||
// because pipeline merge_conflict short-circuits the method
|
||||
const statusCalls = vi
|
||||
.mocked(mockUpdateFeatureStatusFn)
|
||||
.mock.calls.filter((call) => call[2] === 'verified' || call[2] === 'waiting_approval');
|
||||
// The only non-in_progress status call should be absent since merge_conflict returns early
|
||||
expect(statusCalls.length).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { defineConfig, devices } from '@playwright/test';
|
||||
|
||||
const port = process.env.TEST_PORT || 3007;
|
||||
const serverPort = process.env.TEST_SERVER_PORT || 3008;
|
||||
const port = process.env.TEST_PORT || 3107;
|
||||
const serverPort = process.env.TEST_SERVER_PORT || 3108;
|
||||
const reuseServer = process.env.TEST_REUSE_SERVER === 'true';
|
||||
const useExternalBackend = !!process.env.VITE_SERVER_URL;
|
||||
// Always use mock agent for tests (disables rate limiting, uses mock Claude responses)
|
||||
@@ -70,6 +70,10 @@ export default defineConfig({
|
||||
timeout: 120000,
|
||||
env: {
|
||||
...process.env,
|
||||
// Must set AUTOMAKER_WEB_PORT to match the port Playwright waits for
|
||||
AUTOMAKER_WEB_PORT: String(port),
|
||||
// Must set AUTOMAKER_SERVER_PORT so Vite proxy forwards to the correct backend port
|
||||
AUTOMAKER_SERVER_PORT: String(serverPort),
|
||||
VITE_SKIP_SETUP: 'true',
|
||||
// Always skip electron plugin during tests - prevents duplicate server spawning
|
||||
VITE_SKIP_ELECTRON: 'true',
|
||||
|
||||
@@ -8,10 +8,11 @@ import { promisify } from 'util';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
const SERVER_PORT = process.env.TEST_SERVER_PORT || 3008;
|
||||
const UI_PORT = process.env.TEST_PORT || 3007;
|
||||
const SERVER_PORT = process.env.TEST_SERVER_PORT || 3108;
|
||||
const UI_PORT = process.env.TEST_PORT || 3107;
|
||||
const USE_EXTERNAL_SERVER = !!process.env.VITE_SERVER_URL;
|
||||
|
||||
console.log(`[KillTestServers] SERVER_PORT ${SERVER_PORT}`);
|
||||
console.log(`[KillTestServers] UI_PORT ${UI_PORT}`);
|
||||
async function killProcessOnPort(port) {
|
||||
try {
|
||||
const hasLsof = await execAsync('command -v lsof').then(
|
||||
|
||||
@@ -45,7 +45,7 @@ import { toast } from 'sonner';
|
||||
import type { PRReviewComment } from '@/lib/electron';
|
||||
import type { Feature } from '@/store/app-store';
|
||||
import type { PhaseModelEntry } from '@automaker/types';
|
||||
import { supportsReasoningEffort, isAdaptiveThinkingModel } from '@automaker/types';
|
||||
import { supportsReasoningEffort, normalizeThinkingLevelForModel } from '@automaker/types';
|
||||
import { resolveModelString } from '@automaker/model-resolver';
|
||||
import { PhaseModelSelector } from '@/components/views/settings-view/model-defaults';
|
||||
|
||||
@@ -590,20 +590,10 @@ export function PRCommentResolutionDialog({
|
||||
const wasOpenRef = useRef(false);
|
||||
|
||||
const handleModelChange = useCallback((entry: PhaseModelEntry) => {
|
||||
// Normalize thinking level when switching between adaptive and non-adaptive models
|
||||
const isNewModelAdaptive =
|
||||
typeof entry.model === 'string' && isAdaptiveThinkingModel(entry.model);
|
||||
const currentLevel = entry.thinkingLevel || 'none';
|
||||
const modelId = typeof entry.model === 'string' ? entry.model : '';
|
||||
const normalizedThinkingLevel = normalizeThinkingLevelForModel(modelId, entry.thinkingLevel);
|
||||
|
||||
if (isNewModelAdaptive && currentLevel !== 'none' && currentLevel !== 'adaptive') {
|
||||
// Switching TO an adaptive model with a manual level -> auto-switch to 'adaptive'
|
||||
setModelEntry({ ...entry, thinkingLevel: 'adaptive' });
|
||||
} else if (!isNewModelAdaptive && currentLevel === 'adaptive') {
|
||||
// Switching FROM an adaptive model with adaptive -> auto-switch to 'high'
|
||||
setModelEntry({ ...entry, thinkingLevel: 'high' });
|
||||
} else {
|
||||
setModelEntry(entry);
|
||||
}
|
||||
setModelEntry({ ...entry, thinkingLevel: normalizedThinkingLevel });
|
||||
}, []);
|
||||
|
||||
// Fetch PR review comments
|
||||
|
||||
@@ -26,11 +26,12 @@ import { useNavigate } from '@tanstack/react-router';
|
||||
import { toast } from 'sonner';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { modelSupportsThinking } from '@/lib/utils';
|
||||
import { useAppStore, ThinkingLevel, FeatureImage, PlanningMode, Feature } from '@/store/app-store';
|
||||
import { useAppStore } from '@/store/app-store';
|
||||
import type { ThinkingLevel, PlanningMode, Feature, FeatureImage } from '@/store/types';
|
||||
import type { ReasoningEffort, PhaseModelEntry, AgentModel } from '@automaker/types';
|
||||
import {
|
||||
supportsReasoningEffort,
|
||||
isAdaptiveThinkingModel,
|
||||
normalizeThinkingLevelForModel,
|
||||
getThinkingLevelsForModel,
|
||||
} from '@automaker/types';
|
||||
import {
|
||||
@@ -308,20 +309,10 @@ export function AddFeatureDialog({
|
||||
}, [planningMode]);
|
||||
|
||||
const handleModelChange = (entry: PhaseModelEntry) => {
|
||||
// Normalize thinking level when switching between adaptive and non-adaptive models
|
||||
const isNewModelAdaptive =
|
||||
typeof entry.model === 'string' && isAdaptiveThinkingModel(entry.model);
|
||||
const currentLevel = entry.thinkingLevel || 'none';
|
||||
const modelId = typeof entry.model === 'string' ? entry.model : '';
|
||||
const normalizedThinkingLevel = normalizeThinkingLevelForModel(modelId, entry.thinkingLevel);
|
||||
|
||||
if (isNewModelAdaptive && currentLevel !== 'none' && currentLevel !== 'adaptive') {
|
||||
// Switching TO Opus 4.6 with a manual level -> auto-switch to 'adaptive'
|
||||
setModelEntry({ ...entry, thinkingLevel: 'adaptive' });
|
||||
} else if (!isNewModelAdaptive && currentLevel === 'adaptive') {
|
||||
// Switching FROM Opus 4.6 with adaptive -> auto-switch to 'high'
|
||||
setModelEntry({ ...entry, thinkingLevel: 'high' });
|
||||
} else {
|
||||
setModelEntry(entry);
|
||||
}
|
||||
setModelEntry({ ...entry, thinkingLevel: normalizedThinkingLevel });
|
||||
};
|
||||
|
||||
const buildFeatureData = (): FeatureData | null => {
|
||||
|
||||
@@ -1017,6 +1017,7 @@ export function PhaseModelSelector({
|
||||
{/* Secondary zone: expand reasoning effort popover */}
|
||||
<Popover
|
||||
open={isExpanded}
|
||||
modal={false}
|
||||
onOpenChange={(isOpen) => {
|
||||
if (!isOpen) {
|
||||
setExpandedCodexModel(null);
|
||||
@@ -1409,7 +1410,9 @@ export function PhaseModelSelector({
|
||||
return (
|
||||
<button
|
||||
key={level}
|
||||
onClick={() => {
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
e.preventDefault();
|
||||
onChange({
|
||||
providerId: provider.id,
|
||||
model: model.id,
|
||||
@@ -1497,6 +1500,7 @@ export function PhaseModelSelector({
|
||||
{/* Secondary zone: expand thinking level popover */}
|
||||
<Popover
|
||||
open={isExpanded}
|
||||
modal={false}
|
||||
onOpenChange={(isOpen) => {
|
||||
if (!isOpen) {
|
||||
setExpandedProviderModel(null);
|
||||
@@ -1549,7 +1553,9 @@ export function PhaseModelSelector({
|
||||
return (
|
||||
<button
|
||||
key={level}
|
||||
onClick={() => {
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
e.preventDefault();
|
||||
onChange({
|
||||
providerId: provider.id,
|
||||
model: model.id,
|
||||
@@ -1752,7 +1758,9 @@ export function PhaseModelSelector({
|
||||
return (
|
||||
<button
|
||||
key={level}
|
||||
onClick={() => {
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
e.preventDefault();
|
||||
onChange({
|
||||
model: model.id as ModelAlias,
|
||||
thinkingLevel: level,
|
||||
@@ -1856,6 +1864,7 @@ export function PhaseModelSelector({
|
||||
{/* Secondary zone: expand thinking level popover */}
|
||||
<Popover
|
||||
open={isExpanded}
|
||||
modal={false}
|
||||
onOpenChange={(isOpen) => {
|
||||
if (!isOpen) {
|
||||
setExpandedClaudeModel(null);
|
||||
@@ -1906,7 +1915,9 @@ export function PhaseModelSelector({
|
||||
return (
|
||||
<button
|
||||
key={level}
|
||||
onClick={() => {
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
e.preventDefault();
|
||||
onChange({
|
||||
model: model.id as ModelAlias,
|
||||
thinkingLevel: level,
|
||||
@@ -2054,6 +2065,7 @@ export function PhaseModelSelector({
|
||||
>
|
||||
<Popover
|
||||
open={isExpanded}
|
||||
modal={false}
|
||||
onOpenChange={(isOpen) => {
|
||||
if (!isOpen) {
|
||||
setExpandedGroup(null);
|
||||
|
||||
@@ -20,11 +20,11 @@ export const DEFAULT_HEIGHT = 950;
|
||||
// ============================================
|
||||
// Default ports (can be overridden via env) - will be dynamically assigned if these are in use
|
||||
// When launched via root init.mjs we pass:
|
||||
// - PORT (backend)
|
||||
// - TEST_PORT (vite dev server / static)
|
||||
// - SERVER_PORT (backend API server)
|
||||
// - PORT (Vite dev server / static file server)
|
||||
// Guard against NaN from non-numeric environment variables
|
||||
const parsedServerPort = Number.parseInt(process.env.PORT ?? '', 10);
|
||||
const parsedStaticPort = Number.parseInt(process.env.TEST_PORT ?? '', 10);
|
||||
const parsedServerPort = Number.parseInt(process.env.SERVER_PORT ?? '', 10);
|
||||
const parsedStaticPort = Number.parseInt(process.env.PORT ?? '', 10);
|
||||
export const DEFAULT_SERVER_PORT = Number.isFinite(parsedServerPort) ? parsedServerPort : 3008;
|
||||
export const DEFAULT_STATIC_PORT = Number.isFinite(parsedStaticPort) ? parsedStaticPort : 3007;
|
||||
|
||||
|
||||
@@ -33,11 +33,11 @@
|
||||
--input: oklch(0.98 0 0);
|
||||
--ring: oklch(0.3 0 0);
|
||||
|
||||
--chart-1: oklch(0.3 0 0);
|
||||
--chart-2: oklch(0.5 0 0);
|
||||
--chart-3: oklch(0.4 0 0);
|
||||
--chart-4: oklch(0.6 0 0);
|
||||
--chart-5: oklch(0.35 0 0);
|
||||
--chart-1: oklch(0.5 0.14 25); /* Warm red - strings, regex */
|
||||
--chart-2: oklch(0.5 0.13 250); /* Blue - properties, variables */
|
||||
--chart-3: oklch(0.55 0.13 145); /* Green - numbers */
|
||||
--chart-4: oklch(0.45 0.14 300); /* Purple - keywords, booleans, tags */
|
||||
--chart-5: oklch(0.5 0.12 180); /* Teal - types, classes */
|
||||
|
||||
--sidebar: oklch(0.98 0 0);
|
||||
--sidebar-foreground: oklch(0.15 0 0);
|
||||
|
||||
@@ -297,10 +297,10 @@ await expect(page.locator('[data-testid="my-element"]')).toBeVisible();
|
||||
|
||||
### Port conflicts
|
||||
|
||||
If you see "Port 3008 is already in use", kill the process:
|
||||
If you see "Port 3108 is already in use", kill the process:
|
||||
|
||||
```bash
|
||||
lsof -ti:3008 | xargs kill -9
|
||||
lsof -ti:3108 | xargs kill -9
|
||||
```
|
||||
|
||||
## Available Test Utilities
|
||||
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
getKanbanColumn,
|
||||
authenticateForTests,
|
||||
handleLoginScreenIfPresent,
|
||||
API_BASE_URL,
|
||||
} from '../utils';
|
||||
|
||||
const TEST_TEMP_DIR = createTempDirPath('manual-review-test');
|
||||
@@ -155,7 +156,6 @@ test.describe('Feature Manual Review Flow', () => {
|
||||
priority: 2,
|
||||
};
|
||||
|
||||
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
|
||||
const createResponse = await page.request.post(`${API_BASE_URL}/api/features/create`, {
|
||||
data: { projectPath, feature },
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
|
||||
110
apps/ui/tests/features/opus-thinking-level-none.spec.ts
Normal file
110
apps/ui/tests/features/opus-thinking-level-none.spec.ts
Normal file
@@ -0,0 +1,110 @@
|
||||
import { test, expect } from '@playwright/test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
createTempDirPath,
|
||||
cleanupTempDir,
|
||||
setupRealProject,
|
||||
waitForNetworkIdle,
|
||||
clickAddFeature,
|
||||
fillAddFeatureDialog,
|
||||
confirmAddFeature,
|
||||
authenticateForTests,
|
||||
handleLoginScreenIfPresent,
|
||||
} from '../utils';
|
||||
|
||||
const TEST_TEMP_DIR = createTempDirPath('opus-thinking-level-none');
|
||||
|
||||
test.describe('Opus thinking level', () => {
|
||||
let projectPath: string;
|
||||
const projectName = `test-project-${Date.now()}`;
|
||||
|
||||
test.beforeAll(async () => {
|
||||
if (!fs.existsSync(TEST_TEMP_DIR)) {
|
||||
fs.mkdirSync(TEST_TEMP_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
projectPath = path.join(TEST_TEMP_DIR, projectName);
|
||||
fs.mkdirSync(projectPath, { recursive: true });
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(projectPath, 'package.json'),
|
||||
JSON.stringify({ name: projectName, version: '1.0.0' }, null, 2)
|
||||
);
|
||||
|
||||
const automakerDir = path.join(projectPath, '.automaker');
|
||||
fs.mkdirSync(automakerDir, { recursive: true });
|
||||
fs.mkdirSync(path.join(automakerDir, 'features'), { recursive: true });
|
||||
fs.mkdirSync(path.join(automakerDir, 'context'), { recursive: true });
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(automakerDir, 'categories.json'),
|
||||
JSON.stringify({ categories: [] }, null, 2)
|
||||
);
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(automakerDir, 'app_spec.txt'),
|
||||
`# ${projectName}\n\nA test project for Opus thinking level e2e coverage.`
|
||||
);
|
||||
});
|
||||
|
||||
test.afterAll(async () => {
|
||||
cleanupTempDir(TEST_TEMP_DIR);
|
||||
});
|
||||
|
||||
test('persists thinkingLevel none when selected for Claude Opus', async ({ page }) => {
|
||||
const featureDescription = `Opus none thinking ${Date.now()}`;
|
||||
|
||||
await setupRealProject(page, projectPath, projectName, { setAsCurrent: true });
|
||||
await authenticateForTests(page);
|
||||
await page.goto('/board');
|
||||
await page.waitForLoadState('load');
|
||||
await handleLoginScreenIfPresent(page);
|
||||
await waitForNetworkIdle(page);
|
||||
|
||||
await clickAddFeature(page);
|
||||
await fillAddFeatureDialog(page, featureDescription);
|
||||
|
||||
await page.locator('[data-testid="model-selector"]').click();
|
||||
await page.locator('[cmdk-input]').fill('opus');
|
||||
|
||||
const opusItem = page.locator('[cmdk-item]').filter({ hasText: 'Claude Opus' }).first();
|
||||
await expect(opusItem).toBeVisible({ timeout: 10000 });
|
||||
await opusItem.locator('button[title="Adjust thinking level"]').click();
|
||||
|
||||
// Wait for the thinking level popover to appear
|
||||
// The nested popover contains "Thinking Level" text and "None" option
|
||||
// Radix UI popovers need a brief delay for the animation to complete
|
||||
const POPOVER_OPEN_DELAY_MS = 500;
|
||||
await page.waitForTimeout(POPOVER_OPEN_DELAY_MS);
|
||||
|
||||
// Find and click the None button
|
||||
// The button's accessible name includes description: "None No extended thinking"
|
||||
const noneOption = page.getByRole('button', { name: /None.*No extended thinking/i }).first();
|
||||
await expect(noneOption).toBeVisible({ timeout: 5000 });
|
||||
await noneOption.click();
|
||||
|
||||
// Wait for the popover to close and the state to update
|
||||
// The React state update needs a brief moment to propagate to the badge
|
||||
const STATE_UPDATE_DELAY_MS = 300;
|
||||
await page.waitForTimeout(STATE_UPDATE_DELAY_MS);
|
||||
|
||||
// When "None" is selected, the badge should NOT show "Adaptive"
|
||||
await expect(page.locator('[data-testid="model-selector"]')).not.toContainText('Adaptive');
|
||||
|
||||
await confirmAddFeature(page);
|
||||
|
||||
const featuresDir = path.join(projectPath, '.automaker', 'features');
|
||||
await expect.poll(() => fs.readdirSync(featuresDir).length).toBe(1);
|
||||
|
||||
const featureDir = fs.readdirSync(featuresDir)[0];
|
||||
const featureJsonPath = path.join(featuresDir, featureDir, 'feature.json');
|
||||
const featureJson = JSON.parse(fs.readFileSync(featureJsonPath, 'utf-8')) as {
|
||||
description: string;
|
||||
thinkingLevel: string;
|
||||
};
|
||||
|
||||
expect(featureJson.description).toBe(featureDescription);
|
||||
expect(featureJson.thinkingLevel).toBe('none');
|
||||
});
|
||||
});
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
authenticateForTests,
|
||||
handleLoginScreenIfPresent,
|
||||
} from '../utils';
|
||||
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../utils/core/waiting';
|
||||
|
||||
const TEST_TEMP_DIR = createTempDirPath('planning-mode-verification-test');
|
||||
|
||||
@@ -69,7 +70,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
|
||||
await handleLoginScreenIfPresent(page);
|
||||
await waitForNetworkIdle(page);
|
||||
|
||||
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({ timeout: 10000 });
|
||||
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({
|
||||
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
|
||||
});
|
||||
await expect(page.locator('[data-testid="kanban-column-backlog"]')).toBeVisible({
|
||||
timeout: 5000,
|
||||
});
|
||||
@@ -77,9 +80,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
|
||||
// Open the add feature dialog
|
||||
await clickAddFeature(page);
|
||||
|
||||
// Wait for dialog to be visible
|
||||
// Wait for dialog to be visible (clickAddFeature already waits, but this adds an extra check)
|
||||
await expect(page.locator('[data-testid="add-feature-dialog"]')).toBeVisible({
|
||||
timeout: 5000,
|
||||
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
// Find the planning mode select trigger
|
||||
|
||||
@@ -22,6 +22,7 @@ import {
|
||||
getKanbanColumn,
|
||||
authenticateForTests,
|
||||
handleLoginScreenIfPresent,
|
||||
API_BASE_URL,
|
||||
} from '../utils';
|
||||
|
||||
const TEST_TEMP_DIR = createTempDirPath('running-task-display-test');
|
||||
@@ -142,8 +143,6 @@ test.describe('Running Task Card Display', () => {
|
||||
priority: 2,
|
||||
};
|
||||
|
||||
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
|
||||
|
||||
// Create both features via HTTP API
|
||||
const createInProgress = await page.request.post(`${API_BASE_URL}/api/features/create`, {
|
||||
data: { projectPath, feature: inProgressFeature },
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
*/
|
||||
|
||||
import { Page, APIResponse } from '@playwright/test';
|
||||
import { API_BASE_URL, API_ENDPOINTS } from '../core/constants';
|
||||
import { API_BASE_URL, API_ENDPOINTS, WEB_BASE_URL } from '../core/constants';
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
@@ -300,7 +300,7 @@ export async function authenticateWithApiKey(page: Page, apiKey: string): Promis
|
||||
// Ensure we're on a page (needed for cookies to work)
|
||||
const currentUrl = page.url();
|
||||
if (!currentUrl || currentUrl === 'about:blank') {
|
||||
await page.goto('http://localhost:3007', { waitUntil: 'domcontentloaded' });
|
||||
await page.goto(WEB_BASE_URL, { waitUntil: 'domcontentloaded' });
|
||||
}
|
||||
|
||||
// Use Playwright request API (tied to this browser context) to avoid flakiness
|
||||
|
||||
@@ -9,8 +9,19 @@
|
||||
|
||||
/**
|
||||
* Base URL for the API server
|
||||
* Uses TEST_SERVER_PORT env var (default 3108) for test runs
|
||||
*/
|
||||
export const API_BASE_URL = 'http://localhost:3008';
|
||||
export const API_BASE_URL = process.env.TEST_SERVER_PORT
|
||||
? `http://localhost:${process.env.TEST_SERVER_PORT}`
|
||||
: 'http://localhost:3108';
|
||||
|
||||
/**
|
||||
* Base URL for the frontend web server
|
||||
* Uses TEST_PORT env var (default 3107) for test runs
|
||||
*/
|
||||
export const WEB_BASE_URL = process.env.TEST_PORT
|
||||
? `http://localhost:${process.env.TEST_PORT}`
|
||||
: 'http://localhost:3107';
|
||||
|
||||
/**
|
||||
* API endpoints for worktree operations
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import { Page, Locator } from '@playwright/test';
|
||||
|
||||
/**
|
||||
* Default timeout for element waiting operations in E2E tests.
|
||||
* Increased from 5000ms to 10000ms to accommodate CI environments
|
||||
* where dialog rendering may take longer due to React Query data fetching.
|
||||
*/
|
||||
export const DEFAULT_ELEMENT_TIMEOUT_MS = 10000;
|
||||
|
||||
/**
|
||||
* Wait for the page to load
|
||||
* Uses 'load' state instead of 'networkidle' because the app has persistent
|
||||
@@ -20,7 +27,7 @@ export async function waitForElement(
|
||||
): Promise<Locator> {
|
||||
const element = page.locator(`[data-testid="${testId}"]`);
|
||||
await element.waitFor({
|
||||
timeout: options?.timeout ?? 5000,
|
||||
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
|
||||
state: options?.state ?? 'visible',
|
||||
});
|
||||
return element;
|
||||
@@ -36,7 +43,7 @@ export async function waitForElementHidden(
|
||||
): Promise<void> {
|
||||
const element = page.locator(`[data-testid="${testId}"]`);
|
||||
await element.waitFor({
|
||||
timeout: options?.timeout ?? 5000,
|
||||
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
|
||||
state: 'hidden',
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { Page, Locator } from '@playwright/test';
|
||||
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../core/waiting';
|
||||
|
||||
/**
|
||||
* Get a kanban card by feature ID
|
||||
@@ -96,9 +97,16 @@ export async function getDragHandleForFeature(page: Page, featureId: string): Pr
|
||||
* Click the add feature button
|
||||
*/
|
||||
export async function clickAddFeature(page: Page): Promise<void> {
|
||||
await page.click('[data-testid="add-feature-button"]');
|
||||
// There may be multiple add-feature buttons on the page (header, empty state).
|
||||
// Use .first() to click the first visible one.
|
||||
const addButton = page.locator('[data-testid="add-feature-button"]').first();
|
||||
await addButton.waitFor({ state: 'visible', timeout: DEFAULT_ELEMENT_TIMEOUT_MS });
|
||||
await addButton.click({ timeout: 5000 });
|
||||
|
||||
// Wait for dialog to be visible
|
||||
await page.waitForSelector('[data-testid="add-feature-dialog"]', {
|
||||
timeout: 5000,
|
||||
state: 'visible',
|
||||
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -159,7 +167,7 @@ export async function confirmAddFeature(page: Page): Promise<void> {
|
||||
await page.click('[data-testid="confirm-add-feature"]');
|
||||
// Wait for dialog to close
|
||||
await page.waitForFunction(() => !document.querySelector('[data-testid="add-feature-dialog"]'), {
|
||||
timeout: 5000,
|
||||
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -249,11 +249,11 @@ export default defineConfig(({ command }) => {
|
||||
},
|
||||
server: {
|
||||
host: process.env.HOST || '0.0.0.0',
|
||||
port: parseInt(process.env.TEST_PORT || process.env.AUTOMAKER_WEB_PORT || '3007', 10),
|
||||
port: parseInt(process.env.AUTOMAKER_WEB_PORT || '3007', 10),
|
||||
allowedHosts: true,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: 'http://localhost:' + (process.env.AUTOMAKER_SERVER_PORT ?? '5008'),
|
||||
target: 'http://localhost:' + (process.env.AUTOMAKER_SERVER_PORT ?? '3008'),
|
||||
changeOrigin: true,
|
||||
ws: true,
|
||||
},
|
||||
|
||||
@@ -109,7 +109,7 @@ services:
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
- VITE_SERVER_URL=http://localhost:3008
|
||||
- TEST_PORT=3007
|
||||
- TEST_PORT=3107
|
||||
- VITE_SKIP_ELECTRON=true
|
||||
- VITE_APP_MODE=3
|
||||
- HUSKY=0
|
||||
|
||||
@@ -202,6 +202,7 @@ export {
|
||||
getThinkingTokenBudget,
|
||||
isAdaptiveThinkingModel,
|
||||
getThinkingLevelsForModel,
|
||||
normalizeThinkingLevelForModel,
|
||||
getDefaultThinkingLevel,
|
||||
// Event hook constants
|
||||
EVENT_HOOK_TRIGGER_LABELS,
|
||||
|
||||
@@ -260,7 +260,13 @@ export interface ContentBlock {
|
||||
*/
|
||||
export interface ProviderMessage {
|
||||
type: 'assistant' | 'user' | 'error' | 'result';
|
||||
subtype?: 'success' | 'error' | 'error_max_turns' | 'error_max_structured_output_retries';
|
||||
subtype?:
|
||||
| 'success'
|
||||
| 'error'
|
||||
| 'error_max_turns'
|
||||
| 'error_max_structured_output_retries'
|
||||
| 'error_during_execution'
|
||||
| 'error_max_budget_usd';
|
||||
session_id?: string;
|
||||
message?: {
|
||||
role: 'user' | 'assistant';
|
||||
|
||||
@@ -349,6 +349,28 @@ export function getThinkingLevelsForModel(model: string): ThinkingLevel[] {
|
||||
return ['none', 'low', 'medium', 'high', 'ultrathink'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a selected thinking level to a value supported by the target model.
|
||||
* Prefers preserving the selected level, falls back to 'none' when available.
|
||||
*/
|
||||
export function normalizeThinkingLevelForModel(
|
||||
model: string,
|
||||
thinkingLevel: ThinkingLevel | undefined
|
||||
): ThinkingLevel {
|
||||
const availableLevels = getThinkingLevelsForModel(model);
|
||||
const currentLevel = thinkingLevel || 'none';
|
||||
|
||||
if (availableLevels.includes(currentLevel)) {
|
||||
return currentLevel;
|
||||
}
|
||||
|
||||
if (availableLevels.includes('none')) {
|
||||
return 'none';
|
||||
}
|
||||
|
||||
return availableLevels[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default thinking level for a given model.
|
||||
* Used when selecting a model via the primary button in the two-stage selector.
|
||||
|
||||
@@ -57,6 +57,8 @@ DEFAULT_SERVER_PORT=${AUTOMAKER_SERVER_PORT:-3008}
|
||||
PORT_SEARCH_MAX_ATTEMPTS=100
|
||||
WEB_PORT=$DEFAULT_WEB_PORT
|
||||
SERVER_PORT=$DEFAULT_SERVER_PORT
|
||||
TEST_WEB_PORT=${TEST_PORT:-3107}
|
||||
TEST_SERVER_PORT=${TEST_SERVER_PORT:-3108}
|
||||
|
||||
# Port validation function
|
||||
# Returns 0 if valid, 1 if invalid (with error message printed)
|
||||
@@ -1184,7 +1186,8 @@ case $MODE in
|
||||
if [ -f .env ]; then
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
fi
|
||||
export TEST_PORT="$WEB_PORT"
|
||||
export TEST_PORT="$TEST_WEB_PORT"
|
||||
export TEST_SERVER_PORT="$TEST_SERVER_PORT"
|
||||
export VITE_SERVER_URL="http://${APP_HOST}:$SERVER_PORT"
|
||||
export PORT="$SERVER_PORT"
|
||||
export DATA_DIR="$SCRIPT_DIR/data"
|
||||
@@ -1275,7 +1278,8 @@ case $MODE in
|
||||
;;
|
||||
electron)
|
||||
# Set environment variables for Electron (it starts its own server)
|
||||
export TEST_PORT="$WEB_PORT"
|
||||
export TEST_PORT="$TEST_WEB_PORT"
|
||||
export TEST_SERVER_PORT="$TEST_SERVER_PORT"
|
||||
export PORT="$SERVER_PORT"
|
||||
export VITE_SERVER_URL="http://localhost:$SERVER_PORT"
|
||||
export CORS_ORIGIN="http://localhost:$WEB_PORT,http://127.0.0.1:$WEB_PORT"
|
||||
|
||||
Reference in New Issue
Block a user