Fix agent output validation to prevent false verified status (#807)

* Changes from fix/cursor-fix

* feat: Enhance provider error messages with diagnostic context, address test failure, fix port change, move playwright tests to different port

* Update apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* ci: Update test server port from 3008 to 3108 and add environment configuration

* fix: Correct typo in health endpoint URL and standardize port env vars

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
gsxdsm
2026-02-24 20:18:40 -08:00
committed by GitHub
parent 0330c70261
commit 51e9a23ba1
36 changed files with 1610 additions and 104 deletions

View File

@@ -297,10 +297,10 @@ await expect(page.locator('[data-testid="my-element"]')).toBeVisible();
### Port conflicts
If you see "Port 3008 is already in use", kill the process:
If you see "Port 3108 is already in use", kill the process:
```bash
lsof -ti:3008 | xargs kill -9
lsof -ti:3108 | xargs kill -9
```
## Available Test Utilities

View File

@@ -21,6 +21,7 @@ import {
getKanbanColumn,
authenticateForTests,
handleLoginScreenIfPresent,
API_BASE_URL,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('manual-review-test');
@@ -155,7 +156,6 @@ test.describe('Feature Manual Review Flow', () => {
priority: 2,
};
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
const createResponse = await page.request.post(`${API_BASE_URL}/api/features/create`, {
data: { projectPath, feature },
headers: { 'Content-Type': 'application/json' },

View File

@@ -0,0 +1,110 @@
import { test, expect } from '@playwright/test';
import * as fs from 'fs';
import * as path from 'path';
import {
createTempDirPath,
cleanupTempDir,
setupRealProject,
waitForNetworkIdle,
clickAddFeature,
fillAddFeatureDialog,
confirmAddFeature,
authenticateForTests,
handleLoginScreenIfPresent,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('opus-thinking-level-none');
test.describe('Opus thinking level', () => {
let projectPath: string;
const projectName = `test-project-${Date.now()}`;
test.beforeAll(async () => {
if (!fs.existsSync(TEST_TEMP_DIR)) {
fs.mkdirSync(TEST_TEMP_DIR, { recursive: true });
}
projectPath = path.join(TEST_TEMP_DIR, projectName);
fs.mkdirSync(projectPath, { recursive: true });
fs.writeFileSync(
path.join(projectPath, 'package.json'),
JSON.stringify({ name: projectName, version: '1.0.0' }, null, 2)
);
const automakerDir = path.join(projectPath, '.automaker');
fs.mkdirSync(automakerDir, { recursive: true });
fs.mkdirSync(path.join(automakerDir, 'features'), { recursive: true });
fs.mkdirSync(path.join(automakerDir, 'context'), { recursive: true });
fs.writeFileSync(
path.join(automakerDir, 'categories.json'),
JSON.stringify({ categories: [] }, null, 2)
);
fs.writeFileSync(
path.join(automakerDir, 'app_spec.txt'),
`# ${projectName}\n\nA test project for Opus thinking level e2e coverage.`
);
});
test.afterAll(async () => {
cleanupTempDir(TEST_TEMP_DIR);
});
test('persists thinkingLevel none when selected for Claude Opus', async ({ page }) => {
const featureDescription = `Opus none thinking ${Date.now()}`;
await setupRealProject(page, projectPath, projectName, { setAsCurrent: true });
await authenticateForTests(page);
await page.goto('/board');
await page.waitForLoadState('load');
await handleLoginScreenIfPresent(page);
await waitForNetworkIdle(page);
await clickAddFeature(page);
await fillAddFeatureDialog(page, featureDescription);
await page.locator('[data-testid="model-selector"]').click();
await page.locator('[cmdk-input]').fill('opus');
const opusItem = page.locator('[cmdk-item]').filter({ hasText: 'Claude Opus' }).first();
await expect(opusItem).toBeVisible({ timeout: 10000 });
await opusItem.locator('button[title="Adjust thinking level"]').click();
// Wait for the thinking level popover to appear
// The nested popover contains "Thinking Level" text and "None" option
// Radix UI popovers need a brief delay for the animation to complete
const POPOVER_OPEN_DELAY_MS = 500;
await page.waitForTimeout(POPOVER_OPEN_DELAY_MS);
// Find and click the None button
// The button's accessible name includes description: "None No extended thinking"
const noneOption = page.getByRole('button', { name: /None.*No extended thinking/i }).first();
await expect(noneOption).toBeVisible({ timeout: 5000 });
await noneOption.click();
// Wait for the popover to close and the state to update
// The React state update needs a brief moment to propagate to the badge
const STATE_UPDATE_DELAY_MS = 300;
await page.waitForTimeout(STATE_UPDATE_DELAY_MS);
// When "None" is selected, the badge should NOT show "Adaptive"
await expect(page.locator('[data-testid="model-selector"]')).not.toContainText('Adaptive');
await confirmAddFeature(page);
const featuresDir = path.join(projectPath, '.automaker', 'features');
await expect.poll(() => fs.readdirSync(featuresDir).length).toBe(1);
const featureDir = fs.readdirSync(featuresDir)[0];
const featureJsonPath = path.join(featuresDir, featureDir, 'feature.json');
const featureJson = JSON.parse(fs.readFileSync(featureJsonPath, 'utf-8')) as {
description: string;
thinkingLevel: string;
};
expect(featureJson.description).toBe(featureDescription);
expect(featureJson.thinkingLevel).toBe('none');
});
});

View File

@@ -18,6 +18,7 @@ import {
authenticateForTests,
handleLoginScreenIfPresent,
} from '../utils';
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../utils/core/waiting';
const TEST_TEMP_DIR = createTempDirPath('planning-mode-verification-test');
@@ -69,7 +70,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
await handleLoginScreenIfPresent(page);
await waitForNetworkIdle(page);
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({ timeout: 10000 });
await expect(page.locator('[data-testid="board-view"]')).toBeVisible({
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
await expect(page.locator('[data-testid="kanban-column-backlog"]')).toBeVisible({
timeout: 5000,
});
@@ -77,9 +80,9 @@ test.describe('Planning Mode Fix Verification (GitHub #671)', () => {
// Open the add feature dialog
await clickAddFeature(page);
// Wait for dialog to be visible
// Wait for dialog to be visible (clickAddFeature already waits, but this adds an extra check)
await expect(page.locator('[data-testid="add-feature-dialog"]')).toBeVisible({
timeout: 5000,
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
// Find the planning mode select trigger

View File

@@ -22,6 +22,7 @@ import {
getKanbanColumn,
authenticateForTests,
handleLoginScreenIfPresent,
API_BASE_URL,
} from '../utils';
const TEST_TEMP_DIR = createTempDirPath('running-task-display-test');
@@ -142,8 +143,6 @@ test.describe('Running Task Card Display', () => {
priority: 2,
};
const API_BASE_URL = process.env.SERVER_URL || 'http://localhost:3008';
// Create both features via HTTP API
const createInProgress = await page.request.post(`${API_BASE_URL}/api/features/create`, {
data: { projectPath, feature: inProgressFeature },

View File

@@ -4,7 +4,7 @@
*/
import { Page, APIResponse } from '@playwright/test';
import { API_BASE_URL, API_ENDPOINTS } from '../core/constants';
import { API_BASE_URL, API_ENDPOINTS, WEB_BASE_URL } from '../core/constants';
// ============================================================================
// Types
@@ -300,7 +300,7 @@ export async function authenticateWithApiKey(page: Page, apiKey: string): Promis
// Ensure we're on a page (needed for cookies to work)
const currentUrl = page.url();
if (!currentUrl || currentUrl === 'about:blank') {
await page.goto('http://localhost:3007', { waitUntil: 'domcontentloaded' });
await page.goto(WEB_BASE_URL, { waitUntil: 'domcontentloaded' });
}
// Use Playwright request API (tied to this browser context) to avoid flakiness

View File

@@ -9,8 +9,19 @@
/**
* Base URL for the API server
* Uses TEST_SERVER_PORT env var (default 3108) for test runs
*/
export const API_BASE_URL = 'http://localhost:3008';
export const API_BASE_URL = process.env.TEST_SERVER_PORT
? `http://localhost:${process.env.TEST_SERVER_PORT}`
: 'http://localhost:3108';
/**
* Base URL for the frontend web server
* Uses TEST_PORT env var (default 3107) for test runs
*/
export const WEB_BASE_URL = process.env.TEST_PORT
? `http://localhost:${process.env.TEST_PORT}`
: 'http://localhost:3107';
/**
* API endpoints for worktree operations

View File

@@ -1,5 +1,12 @@
import { Page, Locator } from '@playwright/test';
/**
* Default timeout for element waiting operations in E2E tests.
* Increased from 5000ms to 10000ms to accommodate CI environments
* where dialog rendering may take longer due to React Query data fetching.
*/
export const DEFAULT_ELEMENT_TIMEOUT_MS = 10000;
/**
* Wait for the page to load
* Uses 'load' state instead of 'networkidle' because the app has persistent
@@ -20,7 +27,7 @@ export async function waitForElement(
): Promise<Locator> {
const element = page.locator(`[data-testid="${testId}"]`);
await element.waitFor({
timeout: options?.timeout ?? 5000,
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
state: options?.state ?? 'visible',
});
return element;
@@ -36,7 +43,7 @@ export async function waitForElementHidden(
): Promise<void> {
const element = page.locator(`[data-testid="${testId}"]`);
await element.waitFor({
timeout: options?.timeout ?? 5000,
timeout: options?.timeout ?? DEFAULT_ELEMENT_TIMEOUT_MS,
state: 'hidden',
});
}

View File

@@ -1,4 +1,5 @@
import { Page, Locator } from '@playwright/test';
import { DEFAULT_ELEMENT_TIMEOUT_MS } from '../core/waiting';
/**
* Get a kanban card by feature ID
@@ -96,9 +97,16 @@ export async function getDragHandleForFeature(page: Page, featureId: string): Pr
* Click the add feature button
*/
export async function clickAddFeature(page: Page): Promise<void> {
await page.click('[data-testid="add-feature-button"]');
// There may be multiple add-feature buttons on the page (header, empty state).
// Use .first() to click the first visible one.
const addButton = page.locator('[data-testid="add-feature-button"]').first();
await addButton.waitFor({ state: 'visible', timeout: DEFAULT_ELEMENT_TIMEOUT_MS });
await addButton.click({ timeout: 5000 });
// Wait for dialog to be visible
await page.waitForSelector('[data-testid="add-feature-dialog"]', {
timeout: 5000,
state: 'visible',
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
}
@@ -159,7 +167,7 @@ export async function confirmAddFeature(page: Page): Promise<void> {
await page.click('[data-testid="confirm-add-feature"]');
// Wait for dialog to close
await page.waitForFunction(() => !document.querySelector('[data-testid="add-feature-dialog"]'), {
timeout: 5000,
timeout: DEFAULT_ELEMENT_TIMEOUT_MS,
});
}