Fix agent output validation to prevent false verified status (#807)

* Changes from fix/cursor-fix

* feat: Enhance provider error messages with diagnostic context, address test failure, fix port change, move playwright tests to different port

* Update apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* ci: Update test server port from 3008 to 3108 and add environment configuration

* fix: Correct typo in health endpoint URL and standardize port env vars

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
gsxdsm
2026-02-24 20:18:40 -08:00
committed by GitHub
parent 0330c70261
commit 51e9a23ba1
36 changed files with 1610 additions and 104 deletions

View File

@@ -1,7 +1,7 @@
import { defineConfig, devices } from '@playwright/test';
const port = process.env.TEST_PORT || 3007;
const serverPort = process.env.TEST_SERVER_PORT || 3008;
const port = process.env.TEST_PORT || 3107;
const serverPort = process.env.TEST_SERVER_PORT || 3108;
const reuseServer = process.env.TEST_REUSE_SERVER === 'true';
const useExternalBackend = !!process.env.VITE_SERVER_URL;
// Always use mock agent for tests (disables rate limiting, uses mock Claude responses)
@@ -70,6 +70,10 @@ export default defineConfig({
timeout: 120000,
env: {
...process.env,
// Must set AUTOMAKER_WEB_PORT to match the port Playwright waits for
AUTOMAKER_WEB_PORT: String(port),
// Must set AUTOMAKER_SERVER_PORT so Vite proxy forwards to the correct backend port
AUTOMAKER_SERVER_PORT: String(serverPort),
VITE_SKIP_SETUP: 'true',
// Always skip electron plugin during tests - prevents duplicate server spawning
VITE_SKIP_ELECTRON: 'true',

View File

@@ -8,10 +8,11 @@ import { promisify } from 'util';
const execAsync = promisify(exec);
const SERVER_PORT = process.env.TEST_SERVER_PORT || 3008;
const UI_PORT = process.env.TEST_PORT || 3007;
const SERVER_PORT = process.env.TEST_SERVER_PORT || 3108;
const UI_PORT = process.env.TEST_PORT || 3107;
const USE_EXTERNAL_SERVER = !!process.env.VITE_SERVER_URL;
console.log(`[KillTestServers] SERVER_PORT ${SERVER_PORT}`);
console.log(`[KillTestServers] UI_PORT ${UI_PORT}`);
async function killProcessOnPort(port) {
try {
const hasLsof = await execAsync('command -v lsof').then(

View File

@@ -45,7 +45,7 @@ import { toast } from 'sonner';
import type { PRReviewComment } from '@/lib/electron';
import type { Feature } from '@/store/app-store';
import type { PhaseModelEntry } from '@automaker/types';
import { supportsReasoningEffort, isAdaptiveThinkingModel } from '@automaker/types';
import { supportsReasoningEffort, normalizeThinkingLevelForModel } from '@automaker/types';
import { resolveModelString } from '@automaker/model-resolver';
import { PhaseModelSelector } from '@/components/views/settings-view/model-defaults';
@@ -590,20 +590,10 @@ export function PRCommentResolutionDialog({
const wasOpenRef = useRef(false);
const handleModelChange = useCallback((entry: PhaseModelEntry) => {
// Normalize thinking level when switching between adaptive and non-adaptive models
const isNewModelAdaptive =
typeof entry.model === 'string' && isAdaptiveThinkingModel(entry.model);
const currentLevel = entry.thinkingLevel || 'none';
const modelId = typeof entry.model === 'string' ? entry.model : '';
const normalizedThinkingLevel = normalizeThinkingLevelForModel(modelId, entry.thinkingLevel);
if (isNewModelAdaptive && currentLevel !== 'none' && currentLevel !== 'adaptive') {
// Switching TO an adaptive model with a manual level -> auto-switch to 'adaptive'
setModelEntry({ ...entry, thinkingLevel: 'adaptive' });
} else if (!isNewModelAdaptive && currentLevel === 'adaptive') {
// Switching FROM an adaptive model with adaptive -> auto-switch to 'high'
setModelEntry({ ...entry, thinkingLevel: 'high' });
} else {
setModelEntry(entry);
}
setModelEntry({ ...entry, thinkingLevel: normalizedThinkingLevel });
}, []);
// Fetch PR review comments

View File

@@ -26,11 +26,12 @@ import { useNavigate } from '@tanstack/react-router';
import { toast } from 'sonner';
import { cn } from '@/lib/utils';
import { modelSupportsThinking } from '@/lib/utils';
import { useAppStore, ThinkingLevel, FeatureImage, PlanningMode, Feature } from '@/store/app-store';
import { useAppStore } from '@/store/app-store';
import type { ThinkingLevel, PlanningMode, Feature, FeatureImage } from '@/store/types';
import type { ReasoningEffort, PhaseModelEntry, AgentModel } from '@automaker/types';
import {
supportsReasoningEffort,
isAdaptiveThinkingModel,
normalizeThinkingLevelForModel,
getThinkingLevelsForModel,
} from '@automaker/types';
import {
@@ -308,20 +309,10 @@ export function AddFeatureDialog({
}, [planningMode]);
const handleModelChange = (entry: PhaseModelEntry) => {
// Normalize thinking level when switching between adaptive and non-adaptive models
const isNewModelAdaptive =
typeof entry.model === 'string' && isAdaptiveThinkingModel(entry.model);
const currentLevel = entry.thinkingLevel || 'none';
const modelId = typeof entry.model === 'string' ? entry.model : '';
const normalizedThinkingLevel = normalizeThinkingLevelForModel(modelId, entry.thinkingLevel);
if (isNewModelAdaptive && currentLevel !== 'none' && currentLevel !== 'adaptive') {
// Switching TO Opus 4.6 with a manual level -> auto-switch to 'adaptive'
setModelEntry({ ...entry, thinkingLevel: 'adaptive' });
} else if (!isNewModelAdaptive && currentLevel === 'adaptive') {
// Switching FROM Opus 4.6 with adaptive -> auto-switch to 'high'
setModelEntry({ ...entry, thinkingLevel: 'high' });
} else {
setModelEntry(entry);
}
setModelEntry({ ...entry, thinkingLevel: normalizedThinkingLevel });
};
const buildFeatureData = (): FeatureData | null => {

View File

@@ -1017,6 +1017,7 @@ export function PhaseModelSelector({
{/* Secondary zone: expand reasoning effort popover */}
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedCodexModel(null);
@@ -1409,7 +1410,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
providerId: provider.id,
model: model.id,
@@ -1497,6 +1500,7 @@ export function PhaseModelSelector({
{/* Secondary zone: expand thinking level popover */}
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedProviderModel(null);
@@ -1549,7 +1553,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
providerId: provider.id,
model: model.id,
@@ -1752,7 +1758,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
model: model.id as ModelAlias,
thinkingLevel: level,
@@ -1856,6 +1864,7 @@ export function PhaseModelSelector({
{/* Secondary zone: expand thinking level popover */}
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedClaudeModel(null);
@@ -1906,7 +1915,9 @@ export function PhaseModelSelector({
return (
<button
key={level}
onClick={() => {
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onChange({
model: model.id as ModelAlias,
thinkingLevel: level,
@@ -2054,6 +2065,7 @@ export function PhaseModelSelector({
>
<Popover
open={isExpanded}
modal={false}
onOpenChange={(isOpen) => {
if (!isOpen) {
setExpandedGroup(null);

View File

@@ -20,11 +20,11 @@ export const DEFAULT_HEIGHT = 950;
// ============================================
// Default ports (can be overridden via env) - will be dynamically assigned if these are in use
// When launched via root init.mjs we pass:
// - PORT (backend)
// - TEST_PORT (vite dev server / static)
// - SERVER_PORT (backend API server)
// - PORT (Vite dev server / static file server)
// Guard against NaN from non-numeric environment variables
const parsedServerPort = Number.parseInt(process.env.PORT ?? '', 10);
const parsedStaticPort = Number.parseInt(process.env.TEST_PORT ?? '', 10);
const parsedServerPort = Number.parseInt(process.env.SERVER_PORT ?? '', 10);
const parsedStaticPort = Number.parseInt(process.env.PORT ?? '', 10);
export const DEFAULT_SERVER_PORT = Number.isFinite(parsedServerPort) ? parsedServerPort : 3008;
export const DEFAULT_STATIC_PORT = Number.isFinite(parsedStaticPort) ? parsedStaticPort : 3007;

View File

@@ -33,11 +33,11 @@
--input: oklch(0.98 0 0);
--ring: oklch(0.3 0 0);
--chart-1: oklch(0.3 0 0);
--chart-2: oklch(0.5 0 0);
--chart-3: oklch(0.4 0 0);
--chart-4: oklch(0.6 0 0);
--chart-5: oklch(0.35 0 0);
--chart-1: oklch(0.5 0.14 25); /* Warm red - strings, regex */
--chart-2: oklch(0.5 0.13 250); /* Blue - properties, variables */
--chart-3: oklch(0.55 0.13 145); /* Green - numbers */
--chart-4: oklch(0.45 0.14 300); /* Purple - keywords, booleans, tags */
--chart-5: oklch(0.5 0.12 180); /* Teal - types, classes */
--sidebar: oklch(0.98 0 0);
--sidebar-foreground: oklch(0.15 0 0);

View File

@@ -297,10 +297,10 @@ await expect(page.locator('[data-testid="my-element"]')).toBeVisible();
### Port conflicts
If you see "Port 3008 is already in use", kill the process:
If you see "Port 3108 is already in use", kill the process:
```bash
lsof -ti:3008 | xargs kill -9
lsof -ti:3108 | xargs kill -9
```
## Available Test Utilities

View File

@@ -21,6 +21,7 @@ import {
getKanbanColumn,
authenticateForTests,
handleLoginScreenIfPresent,
API_BASE_URL,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('manual-review-test');
@@ -155,7 +156,6 @@ test.describe('Feature Manual Review Flow', () => {
priority: 2,
};
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
const createResponse = await page.request.post(`${API_BASE_URL}/api/features/create`, {
data: { projectPath, feature },
headers: { 'Content-Type': 'application/json' },

View File

@@ -0,0 +1,110 @@
import { test, expect } from '@playwright/test';
import * as fs from 'fs';
import * as path from 'path';
import {
createTempDirPath,
cleanupTempDir,
setupRealProject,
waitForNetworkIdle,
clickAddFeature,
fillAddFeatureDialog,
confirmAddFeature,
authenticateForTests,
handleLoginScreenIfPresent,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('opus-thinking-level-none');
test.describe('Opus thinking level', () => {
let projectPath: string;
const projectName = `test-project-${Date.now()}`;
test.beforeAll(async () => {
if (!fs.existsSync(TEST_TEMP_DIR)) {
fs.mkdirSync(TEST_TEMP_DIR, { recursive: true });
}
projectPath = path.join(TEST_TEMP_DIR, projectName);
fs.mkdirSync(projectPath, { recursive: true });
fs.writeFileSync(
path.join(projectPath, 'package.json'),
JSON.stringify({ name: projectName, version: '1.0.0' }, null, 2)
);
const automakerDir = path.join(projectPath, '.automaker');
fs.mkdirSync(automakerDir, { recursive: true });
fs.mkdirSync(path.join(automakerDir, 'features'), { recursive: true });
fs.mkdirSync(path.join(automakerDir, 'context'), { recursive: true });
fs.writeFileSync(
path.join(automakerDir, 'categories.json'),
JSON.stringify({ categories: [] }, null, 2)
);
fs.writeFileSync(
path.join(automakerDir, 'app_spec.txt'),
`# ${projectName}\n\nA test project for Opus thinking level e2e coverage.`
);
});
test.afterAll(async () => {
cleanupTempDir(TEST_TEMP_DIR);
});
test('persists thinkingLevel none when selected for Claude Opus', async ({ page }) => {
const featureDescription = `Opus none thinking ${Date.now()}`;
await setupRealProject(page, projectPath, projectName, { setAsCurrent: true });
await authenticateForTests(page);
await page.goto('/board');
await page.waitForLoadState('load');
await handleLoginScreenIfPresent(page);
await waitForNetworkIdle(page);
await clickAddFeature(page);
await fillAddFeatureDialog(page, featureDescription);
await page.locator('[data-testid="model-selector"]').click();
await page.locator('[cmdk-input]').fill('opus');
const opusItem = page.locator('[cmdk-item]').filter({ hasText: 'Claude Opus' }).first();
await expect(opusItem).toBeVisible({ timeout: 10000 });
await opusItem.locator('button[title="Adjust thinking level"]').click();
// Wait for the thinking level popover to appear
// The nested popover contains "Thinking Level" text and "None" option
// Radix UI popovers need a brief delay for the animation to complete
const POPOVER_OPEN_DELAY_MS = 500;
await page.waitForTimeout(POPOVER_OPEN_DELAY_MS);
// Find and click the None button
// The button's accessible name includes description: "None No extended thinking"
const noneOption = page.getByRole('button', { name: /None.*No extended thinking/i }).first();
await expect(noneOption).toBeVisible({ timeout: 5000 });
await noneOption.click();
// Wait for the popover to close and the state to update
// The React state update needs a brief moment to propagate to the badge
const STATE_UPDATE_DELAY_MS = 300;
await page.waitForTimeout(STATE_UPDATE_DELAY_MS);
// When "None" is selected, the badge should NOT show "Adaptive"
await expect(page.locator('[data-testid="model-selector"]')).not.toContainText('Adaptive');
await confirmAddFeature(page);
const featuresDir = path.join(projectPath, '.automaker', 'features');
await expect.poll(() => fs.readdirSync(featuresDir).length).toBe(1);
const featureDir = fs.readdirSync(featuresDir)[0];
const featureJsonPath = path.join(featuresDir, featureDir, 'feature.json');
const featureJson = JSON.parse(fs.readFileSync(featureJsonPath, 'utf-8')) as {
description: string;
thinkingLevel: string;
};
expect(featureJson.description).toBe(featureDescription);
expect(featureJson.thinkingLevel).toBe('none');
});
});

View File

@@ -18,6 +18,7 @@ import {
authenticateForTests,
handleLoginScreenIfPresent,
} from '../utils';
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../utils/core/waiting';
const TEST_TEMP_DIR = createTempDirPath('planning-mode-verification-test');
@@ -69,7 +70,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
await handleLoginScreenIfPresent(page);
await waitForNetworkIdle(page);
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({ timeout: 10000 });
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
await expect(page.locator('[data-testid="kanban-column-backlog"]')).toBeVisible({
timeout: 5000,
});
@@ -77,9 +80,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
// Open the add feature dialog
await clickAddFeature(page);
// Wait for dialog to be visible
// Wait for dialog to be visible (clickAddFeature already waits, but this adds an extra check)
await expect(page.locator('[data-testid="add-feature-dialog"]')).toBeVisible({
timeout: 5000,
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
// Find the planning mode select trigger

View File

@@ -22,6 +22,7 @@ import {
getKanbanColumn,
authenticateForTests,
handleLoginScreenIfPresent,
API_BASE_URL,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('running-task-display-test');
@@ -142,8 +143,6 @@ test.describe('Running Task Card Display', () => {
priority: 2,
};
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
// Create both features via HTTP API
const createInProgress = await page.request.post(`${API_BASE_URL}/api/features/create`, {
data: { projectPath, feature: inProgressFeature },

View File

@@ -4,7 +4,7 @@
*/
import { Page, APIResponse } from '@playwright/test';
import { API_BASE_URL, API_ENDPOINTS } from '../core/constants';
import { API_BASE_URL, API_ENDPOINTS, WEB_BASE_URL } from '../core/constants';
// ============================================================================
// Types
@@ -300,7 +300,7 @@ export async function authenticateWithApiKey(page: Page, apiKey: string): Promis
// Ensure we're on a page (needed for cookies to work)
const currentUrl = page.url();
if (!currentUrl || currentUrl === 'about:blank') {
await page.goto('http://localhost:3007', { waitUntil: 'domcontentloaded' });
await page.goto(WEB_BASE_URL, { waitUntil: 'domcontentloaded' });
}
// Use Playwright request API (tied to this browser context) to avoid flakiness

View File

@@ -9,8 +9,19 @@
/**
* Base URL for the API server
* Uses TEST_SERVER_PORT env var (default 3108) for test runs
*/
export const API_BASE_URL = 'http://localhost:3008';
export const API_BASE_URL = process.env.TEST_SERVER_PORT
? `http://localhost:${process.env.TEST_SERVER_PORT}`
: 'http://localhost:3108';
/**
* Base URL for the frontend web server
* Uses TEST_PORT env var (default 3107) for test runs
*/
export const WEB_BASE_URL = process.env.TEST_PORT
? `http://localhost:${process.env.TEST_PORT}`
: 'http://localhost:3107';
/**
* API endpoints for worktree operations

View File

@@ -1,5 +1,12 @@
import { Page, Locator } from '@playwright/test';
/**
* Default timeout for element waiting operations in E2E tests.
* Increased from 5000ms to 10000ms to accommodate CI environments
* where dialog rendering may take longer due to React Query data fetching.
*/
export const DEFAULT_ELEMENT_TIMEOUT_MS = 10000;
/**
* Wait for the page to load
* Uses 'load' state instead of 'networkidle' because the app has persistent
@@ -20,7 +27,7 @@ export async function waitForElement(
): Promise<Locator> {
const element = page.locator(`[data-testid="${testId}"]`);
await element.waitFor({
timeout: options?.timeout ?? 5000,
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
state: options?.state ?? 'visible',
});
return element;
@@ -36,7 +43,7 @@ export async function waitForElementHidden(
): Promise<void> {
const element = page.locator(`[data-testid="${testId}"]`);
await element.waitFor({
timeout: options?.timeout ?? 5000,
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
state: 'hidden',
});
}

View File

@@ -1,4 +1,5 @@
import { Page, Locator } from '@playwright/test';
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../core/waiting';
/**
* Get a kanban card by feature ID
@@ -96,9 +97,16 @@ export async function getDragHandleForFeature(page: Page, featureId: string): Pr
* Click the add feature button
*/
export async function clickAddFeature(page: Page): Promise<void> {
await page.click('[data-testid="add-feature-button"]');
// There may be multiple add-feature buttons on the page (header, empty state).
// Use .first() to click the first visible one.
const addButton = page.locator('[data-testid="add-feature-button"]').first();
await addButton.waitFor({ state: 'visible', timeout: DEFAULT_ELEMENT_TIMEOUT_MS });
await addButton.click({ timeout: 5000 });
// Wait for dialog to be visible
await page.waitForSelector('[data-testid="add-feature-dialog"]', {
timeout: 5000,
state: 'visible',
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
}
@@ -159,7 +167,7 @@ export async function confirmAddFeature(page: Page): Promise<void> {
await page.click('[data-testid="confirm-add-feature"]');
// Wait for dialog to close
await page.waitForFunction(() => !document.querySelector('[data-testid="add-feature-dialog"]'), {
timeout: 5000,
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
}

View File

@@ -249,11 +249,11 @@ export default defineConfig(({ command }) => {
},
server: {
host: process.env.HOST || '0.0.0.0',
port: parseInt(process.env.TEST_PORT || process.env.AUTOMAKER_WEB_PORT || '3007', 10),
port: parseInt(process.env.AUTOMAKER_WEB_PORT || '3007', 10),
allowedHosts: true,
proxy: {
'/api': {
target: 'http://localhost:' + (process.env.AUTOMAKER_SERVER_PORT ?? '5008'),
target: 'http://localhost:' + (process.env.AUTOMAKER_SERVER_PORT ?? '3008'),
changeOrigin: true,
ws: true,
},