mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-02-01 08:13:37 +00:00
* memory * feat: add smart memory selection with task context - Add taskContext parameter to loadContextFiles for intelligent file selection - Memory files are scored based on tag matching with task keywords - Category name matching (e.g., "terminals" matches terminals.md) with 4x weight - Usage statistics influence scoring (files that helped before rank higher) - Limit to top 5 files + always include gotchas.md - Auto-mode passes feature title/description as context - Chat sessions pass user message as context This prevents loading 40+ memory files and killing context limits. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor: enhance auto-mode service and context loader - Improved context loading by adding task context for better memory selection. - Updated JSON parsing logic to handle various formats and ensure robust error handling. - Introduced file locking mechanisms to prevent race conditions during memory file updates. - Enhanced metadata handling in memory files, including validation and sanitization. - Refactored scoring logic for context files to improve selection accuracy based on task relevance. These changes optimize memory file management and enhance the overall performance of the auto-mode service. * refactor: enhance learning extraction and formatting in auto-mode service - Improved the learning extraction process by refining the user prompt to focus on meaningful insights and structured JSON output. - Updated the LearningEntry interface to include additional context fields for better documentation of decisions and patterns. - Enhanced the formatLearning function to adopt an Architecture Decision Record (ADR) style, providing richer context for recorded learnings. - Added detailed logging for better traceability during the learning extraction and appending processes. These changes aim to improve the quality and clarity of learnings captured during the auto-mode service's operation. * feat: integrate stripProviderPrefix utility for model ID handling - Added stripProviderPrefix utility to various routes to ensure providers receive bare model IDs. - Updated model references in executeQuery calls across multiple files, enhancing consistency in model ID handling. - Introduced memoryExtractionModel in settings for improved learning extraction tasks. These changes streamline the model ID processing and enhance the overall functionality of the provider interactions. * feat: enhance error handling and server offline management in board actions - Improved error handling in the handleRunFeature and handleStartImplementation functions to throw errors for better caller management. - Integrated connection error detection and server offline handling, redirecting users to the login page when the server is unreachable. - Updated follow-up feature logic to include rollback mechanisms and improved user feedback for error scenarios. These changes enhance the robustness of the board actions by ensuring proper error management and user experience during server connectivity issues. --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: webdevcody <webdevcody@gmail.com>
483 lines
17 KiB
TypeScript
483 lines
17 KiB
TypeScript
/**
|
|
* POST /context/describe-image endpoint - Generate description for an image
|
|
*
|
|
* Uses AI to analyze an image and generate a concise description
|
|
* suitable for context file metadata. Model is configurable via
|
|
* phaseModels.imageDescriptionModel in settings (defaults to Haiku).
|
|
*
|
|
* IMPORTANT:
|
|
* The agent runner (chat/auto-mode) sends images as multi-part content blocks (base64 image blocks),
|
|
* not by asking Claude to use the Read tool to open files. This endpoint now mirrors that approach
|
|
* so it doesn't depend on Claude's filesystem tool access or working directory restrictions.
|
|
*/
|
|
|
|
import type { Request, Response } from 'express';
|
|
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
import { createLogger, readImageAsBase64 } from '@automaker/utils';
|
|
import { DEFAULT_PHASE_MODELS, isCursorModel, stripProviderPrefix } from '@automaker/types';
|
|
import { resolvePhaseModel } from '@automaker/model-resolver';
|
|
import { createCustomOptions } from '../../../lib/sdk-options.js';
|
|
import { ProviderFactory } from '../../../providers/provider-factory.js';
|
|
import * as secureFs from '../../../lib/secure-fs.js';
|
|
import * as path from 'path';
|
|
import type { SettingsService } from '../../../services/settings-service.js';
|
|
import { getAutoLoadClaudeMdSetting } from '../../../lib/settings-helpers.js';
|
|
|
|
const logger = createLogger('DescribeImage');
|
|
|
|
/**
|
|
* Allowlist of safe headers to log
|
|
* All other headers are excluded to prevent leaking sensitive values
|
|
*/
|
|
const SAFE_HEADERS_ALLOWLIST = new Set([
|
|
'content-type',
|
|
'accept',
|
|
'user-agent',
|
|
'host',
|
|
'referer',
|
|
'content-length',
|
|
'origin',
|
|
'x-request-id',
|
|
]);
|
|
|
|
/**
|
|
* Filter request headers to only include safe, non-sensitive values
|
|
*/
|
|
function filterSafeHeaders(headers: Record<string, unknown>): Record<string, unknown> {
|
|
const filtered: Record<string, unknown> = {};
|
|
for (const [key, value] of Object.entries(headers)) {
|
|
if (SAFE_HEADERS_ALLOWLIST.has(key.toLowerCase())) {
|
|
filtered[key] = value;
|
|
}
|
|
}
|
|
return filtered;
|
|
}
|
|
|
|
/**
|
|
* Find the actual file path, handling Unicode character variations.
|
|
* macOS screenshots use U+202F (NARROW NO-BREAK SPACE) before AM/PM,
|
|
* but this may be transmitted as a regular space through the API.
|
|
*/
|
|
function findActualFilePath(requestedPath: string): string | null {
|
|
// First, try the exact path
|
|
if (secureFs.existsSync(requestedPath)) {
|
|
return requestedPath;
|
|
}
|
|
|
|
// Try with Unicode normalization
|
|
const normalizedPath = requestedPath.normalize('NFC');
|
|
if (secureFs.existsSync(normalizedPath)) {
|
|
return normalizedPath;
|
|
}
|
|
|
|
// If not found, try to find the file in the directory by matching the basename
|
|
// This handles cases where the space character differs (U+0020 vs U+202F vs U+00A0)
|
|
const dir = path.dirname(requestedPath);
|
|
const baseName = path.basename(requestedPath);
|
|
|
|
if (!secureFs.existsSync(dir)) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
const files = secureFs.readdirSync(dir);
|
|
|
|
// Normalize the requested basename for comparison
|
|
// Replace various space-like characters with regular space for comparison
|
|
const normalizeSpaces = (s: string): string => s.replace(/[\u00A0\u202F\u2009\u200A]/g, ' ');
|
|
|
|
const normalizedBaseName = normalizeSpaces(baseName);
|
|
|
|
for (const file of files) {
|
|
if (normalizeSpaces(file) === normalizedBaseName) {
|
|
logger.info(`Found matching file with different space encoding: ${file}`);
|
|
return path.join(dir, file);
|
|
}
|
|
}
|
|
} catch (err) {
|
|
logger.error(`Error reading directory ${dir}: ${err}`);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Request body for the describe-image endpoint
|
|
*/
|
|
interface DescribeImageRequestBody {
|
|
/** Path to the image file */
|
|
imagePath: string;
|
|
}
|
|
|
|
/**
|
|
* Success response from the describe-image endpoint
|
|
*/
|
|
interface DescribeImageSuccessResponse {
|
|
success: true;
|
|
description: string;
|
|
}
|
|
|
|
/**
|
|
* Error response from the describe-image endpoint
|
|
*/
|
|
interface DescribeImageErrorResponse {
|
|
success: false;
|
|
error: string;
|
|
requestId?: string;
|
|
}
|
|
|
|
/**
|
|
* Map SDK/CLI errors to a stable status + user-facing message.
|
|
*/
|
|
function mapDescribeImageError(rawMessage: string | undefined): {
|
|
statusCode: number;
|
|
userMessage: string;
|
|
} {
|
|
const baseResponse = {
|
|
statusCode: 500,
|
|
userMessage: 'Failed to generate an image description. Please try again.',
|
|
};
|
|
|
|
if (!rawMessage) return baseResponse;
|
|
|
|
if (rawMessage.includes('Claude Code process exited')) {
|
|
return {
|
|
statusCode: 503,
|
|
userMessage:
|
|
'Claude exited unexpectedly while describing the image. Try again. If it keeps happening, re-run `claude login` or update your API key in Setup so Claude can restart cleanly.',
|
|
};
|
|
}
|
|
|
|
if (
|
|
rawMessage.includes('Failed to spawn Claude Code process') ||
|
|
rawMessage.includes('Claude Code executable not found') ||
|
|
rawMessage.includes('Claude Code native binary not found')
|
|
) {
|
|
return {
|
|
statusCode: 503,
|
|
userMessage:
|
|
'Claude CLI could not be launched. Make sure the Claude CLI is installed and available in PATH, then try again.',
|
|
};
|
|
}
|
|
|
|
if (rawMessage.toLowerCase().includes('rate limit') || rawMessage.includes('429')) {
|
|
return {
|
|
statusCode: 429,
|
|
userMessage: 'Rate limited while describing the image. Please wait a moment and try again.',
|
|
};
|
|
}
|
|
|
|
if (rawMessage.toLowerCase().includes('payload too large') || rawMessage.includes('413')) {
|
|
return {
|
|
statusCode: 413,
|
|
userMessage:
|
|
'The image is too large to send for description. Please resize/compress it and try again.',
|
|
};
|
|
}
|
|
|
|
return baseResponse;
|
|
}
|
|
|
|
/**
|
|
* Extract text content from Claude SDK response messages and log high-signal stream events.
|
|
*/
|
|
async function extractTextFromStream(
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
stream: AsyncIterable<any>,
|
|
requestId: string
|
|
): Promise<string> {
|
|
let responseText = '';
|
|
let messageCount = 0;
|
|
|
|
logger.info(`[${requestId}] [Stream] Begin reading SDK stream...`);
|
|
|
|
for await (const msg of stream) {
|
|
messageCount++;
|
|
const msgType = msg?.type;
|
|
const msgSubtype = msg?.subtype;
|
|
|
|
// Keep this concise but informative. Full error object is logged in catch blocks.
|
|
logger.info(
|
|
`[${requestId}] [Stream] #${messageCount} type=${String(msgType)} subtype=${String(msgSubtype ?? '')}`
|
|
);
|
|
|
|
if (msgType === 'assistant' && msg.message?.content) {
|
|
const blocks = msg.message.content as Array<{ type: string; text?: string }>;
|
|
logger.info(`[${requestId}] [Stream] assistant blocks=${blocks.length}`);
|
|
for (const block of blocks) {
|
|
if (block.type === 'text' && block.text) {
|
|
responseText += block.text;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (msgType === 'result' && msgSubtype === 'success') {
|
|
if (typeof msg.result === 'string' && msg.result.length > 0) {
|
|
responseText = msg.result;
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.info(
|
|
`[${requestId}] [Stream] End of stream. messages=${messageCount} textLength=${responseText.length}`
|
|
);
|
|
|
|
return responseText;
|
|
}
|
|
|
|
/**
|
|
* Create the describe-image request handler
|
|
*
|
|
* Uses Claude SDK query with multi-part content blocks to include the image (base64),
|
|
* matching the agent runner behavior.
|
|
*
|
|
* @param settingsService - Optional settings service for loading autoLoadClaudeMd setting
|
|
* @returns Express request handler for image description
|
|
*/
|
|
export function createDescribeImageHandler(
|
|
settingsService?: SettingsService
|
|
): (req: Request, res: Response) => Promise<void> {
|
|
return async (req: Request, res: Response): Promise<void> => {
|
|
const requestId = `describe-image-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
|
|
const startedAt = Date.now();
|
|
|
|
// Request envelope logs (high value when correlating failures)
|
|
// Only log safe headers to prevent leaking sensitive values (auth tokens, cookies, etc.)
|
|
logger.info(`[${requestId}] ===== POST /api/context/describe-image =====`);
|
|
logger.info(`[${requestId}] headers=${JSON.stringify(filterSafeHeaders(req.headers))}`);
|
|
logger.info(`[${requestId}] body=${JSON.stringify(req.body)}`);
|
|
|
|
try {
|
|
const { imagePath } = req.body as DescribeImageRequestBody;
|
|
|
|
// Validate required fields
|
|
if (!imagePath || typeof imagePath !== 'string') {
|
|
const response: DescribeImageErrorResponse = {
|
|
success: false,
|
|
error: 'imagePath is required and must be a string',
|
|
requestId,
|
|
};
|
|
res.status(400).json(response);
|
|
return;
|
|
}
|
|
|
|
logger.info(`[${requestId}] imagePath="${imagePath}" type=${typeof imagePath}`);
|
|
|
|
// Find the actual file path (handles Unicode space character variations)
|
|
const actualPath = findActualFilePath(imagePath);
|
|
if (!actualPath) {
|
|
logger.error(`[${requestId}] File not found: ${imagePath}`);
|
|
// Log hex representation of the path for debugging
|
|
const hexPath = Buffer.from(imagePath).toString('hex');
|
|
logger.error(`[${requestId}] imagePath hex: ${hexPath}`);
|
|
const response: DescribeImageErrorResponse = {
|
|
success: false,
|
|
error: `File not found: ${imagePath}`,
|
|
requestId,
|
|
};
|
|
res.status(404).json(response);
|
|
return;
|
|
}
|
|
|
|
if (actualPath !== imagePath) {
|
|
logger.info(`[${requestId}] Using actual path: ${actualPath}`);
|
|
}
|
|
|
|
// Log path + stats (this is often where issues start: missing file, perms, size)
|
|
let stat: ReturnType<typeof secureFs.statSync> | null = null;
|
|
try {
|
|
stat = secureFs.statSync(actualPath);
|
|
logger.info(
|
|
`[${requestId}] fileStats size=${stat.size} bytes mtime=${stat.mtime.toISOString()}`
|
|
);
|
|
} catch (statErr) {
|
|
logger.warn(
|
|
`[${requestId}] Unable to stat image file (continuing to read base64): ${String(statErr)}`
|
|
);
|
|
}
|
|
|
|
// Read image and convert to base64 (same as agent runner)
|
|
logger.info(`[${requestId}] Reading image into base64...`);
|
|
const imageReadStart = Date.now();
|
|
const imageData = await readImageAsBase64(actualPath);
|
|
const imageReadMs = Date.now() - imageReadStart;
|
|
|
|
const base64Length = imageData.base64.length;
|
|
const estimatedBytes = Math.ceil((base64Length * 3) / 4);
|
|
logger.info(`[${requestId}] imageReadMs=${imageReadMs}`);
|
|
logger.info(
|
|
`[${requestId}] image meta filename=${imageData.filename} mime=${imageData.mimeType} base64Len=${base64Length} estBytes=${estimatedBytes}`
|
|
);
|
|
|
|
// Build multi-part prompt with image block (no Read tool required)
|
|
const instructionText =
|
|
`Describe this image in 1-2 sentences suitable for use as context in an AI coding assistant. ` +
|
|
`Focus on what the image shows and its purpose (e.g., "UI mockup showing login form with email/password fields", ` +
|
|
`"Architecture diagram of microservices", "Screenshot of error message in terminal").\n\n` +
|
|
`Respond with ONLY the description text, no additional formatting, preamble, or explanation.`;
|
|
|
|
const promptContent = [
|
|
{ type: 'text' as const, text: instructionText },
|
|
{
|
|
type: 'image' as const,
|
|
source: {
|
|
type: 'base64' as const,
|
|
media_type: imageData.mimeType,
|
|
data: imageData.base64,
|
|
},
|
|
},
|
|
];
|
|
|
|
logger.info(`[${requestId}] Built multi-part prompt blocks=${promptContent.length}`);
|
|
|
|
const cwd = path.dirname(actualPath);
|
|
logger.info(`[${requestId}] Using cwd=${cwd}`);
|
|
|
|
// Load autoLoadClaudeMd setting
|
|
const autoLoadClaudeMd = await getAutoLoadClaudeMdSetting(
|
|
cwd,
|
|
settingsService,
|
|
'[DescribeImage]'
|
|
);
|
|
|
|
// Get model from phase settings
|
|
const settings = await settingsService?.getGlobalSettings();
|
|
const phaseModelEntry =
|
|
settings?.phaseModels?.imageDescriptionModel || DEFAULT_PHASE_MODELS.imageDescriptionModel;
|
|
const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry);
|
|
|
|
logger.info(`[${requestId}] Using model: ${model}`);
|
|
|
|
let description: string;
|
|
|
|
// Route to appropriate provider based on model type
|
|
if (isCursorModel(model)) {
|
|
// Use Cursor provider for Cursor models
|
|
// Note: Cursor may have limited support for image content blocks
|
|
logger.info(`[${requestId}] Using Cursor provider for model: ${model}`);
|
|
|
|
const provider = ProviderFactory.getProviderForModel(model);
|
|
// Strip provider prefix - providers expect bare model IDs
|
|
const bareModel = stripProviderPrefix(model);
|
|
|
|
// Build prompt with image reference for Cursor
|
|
// Note: Cursor CLI may not support base64 image blocks directly,
|
|
// so we include the image path as context
|
|
const cursorPrompt = `${instructionText}\n\nImage file: ${actualPath}\nMIME type: ${imageData.mimeType}`;
|
|
|
|
let responseText = '';
|
|
const queryStart = Date.now();
|
|
for await (const msg of provider.executeQuery({
|
|
prompt: cursorPrompt,
|
|
model: bareModel,
|
|
cwd,
|
|
maxTurns: 1,
|
|
allowedTools: ['Read'], // Allow Read tool so Cursor can read the image if needed
|
|
readOnly: true, // Image description only reads, doesn't write
|
|
})) {
|
|
if (msg.type === 'assistant' && msg.message?.content) {
|
|
for (const block of msg.message.content) {
|
|
if (block.type === 'text' && block.text) {
|
|
responseText += block.text;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
logger.info(`[${requestId}] Cursor query completed in ${Date.now() - queryStart}ms`);
|
|
description = responseText;
|
|
} else {
|
|
// Use Claude SDK for Claude models (supports image content blocks)
|
|
logger.info(`[${requestId}] Using Claude SDK for model: ${model}`);
|
|
|
|
// Use the same centralized option builder used across the server (validates cwd)
|
|
const sdkOptions = createCustomOptions({
|
|
cwd,
|
|
model,
|
|
maxTurns: 1,
|
|
allowedTools: [],
|
|
autoLoadClaudeMd,
|
|
thinkingLevel, // Pass thinking level for extended thinking
|
|
});
|
|
|
|
logger.info(
|
|
`[${requestId}] SDK options model=${sdkOptions.model} maxTurns=${sdkOptions.maxTurns} allowedTools=${JSON.stringify(
|
|
sdkOptions.allowedTools
|
|
)}`
|
|
);
|
|
|
|
const promptGenerator = (async function* () {
|
|
yield {
|
|
type: 'user' as const,
|
|
session_id: '',
|
|
message: { role: 'user' as const, content: promptContent },
|
|
parent_tool_use_id: null,
|
|
};
|
|
})();
|
|
|
|
logger.info(`[${requestId}] Calling query()...`);
|
|
const queryStart = Date.now();
|
|
const stream = query({ prompt: promptGenerator, options: sdkOptions });
|
|
logger.info(`[${requestId}] query() returned stream in ${Date.now() - queryStart}ms`);
|
|
|
|
// Extract the description from the response
|
|
const extractStart = Date.now();
|
|
description = await extractTextFromStream(stream, requestId);
|
|
logger.info(`[${requestId}] extractMs=${Date.now() - extractStart}`);
|
|
}
|
|
|
|
if (!description || description.trim().length === 0) {
|
|
logger.warn(`[${requestId}] Received empty response from Claude`);
|
|
const response: DescribeImageErrorResponse = {
|
|
success: false,
|
|
error: 'Failed to generate description - empty response',
|
|
requestId,
|
|
};
|
|
res.status(500).json(response);
|
|
return;
|
|
}
|
|
|
|
const totalMs = Date.now() - startedAt;
|
|
logger.info(`[${requestId}] Success descriptionLen=${description.length} totalMs=${totalMs}`);
|
|
|
|
const response: DescribeImageSuccessResponse = {
|
|
success: true,
|
|
description: description.trim(),
|
|
};
|
|
res.json(response);
|
|
} catch (error) {
|
|
const totalMs = Date.now() - startedAt;
|
|
const err = error as unknown;
|
|
const errMessage = err instanceof Error ? err.message : String(err);
|
|
const errName = err instanceof Error ? err.name : 'UnknownError';
|
|
const errStack = err instanceof Error ? err.stack : undefined;
|
|
|
|
logger.error(`[${requestId}] FAILED totalMs=${totalMs}`);
|
|
logger.error(`[${requestId}] errorName=${errName}`);
|
|
logger.error(`[${requestId}] errorMessage=${errMessage}`);
|
|
if (errStack) logger.error(`[${requestId}] errorStack=${errStack}`);
|
|
|
|
// Dump all enumerable + non-enumerable props (this is where stderr/stdout/exitCode often live)
|
|
try {
|
|
const props = err && typeof err === 'object' ? Object.getOwnPropertyNames(err) : [];
|
|
logger.error(`[${requestId}] errorProps=${JSON.stringify(props)}`);
|
|
if (err && typeof err === 'object') {
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
const anyErr = err as any;
|
|
const details = JSON.stringify(anyErr, props as unknown as string[]);
|
|
logger.error(`[${requestId}] errorDetails=${details}`);
|
|
}
|
|
} catch (stringifyErr) {
|
|
logger.error(`[${requestId}] Failed to serialize error object: ${String(stringifyErr)}`);
|
|
}
|
|
|
|
const { statusCode, userMessage } = mapDescribeImageError(errMessage);
|
|
const response: DescribeImageErrorResponse = {
|
|
success: false,
|
|
error: `${userMessage} (requestId: ${requestId})`,
|
|
requestId,
|
|
};
|
|
res.status(statusCode).json(response);
|
|
}
|
|
};
|
|
}
|