mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-01-30 06:12:03 +00:00
feat: enhance file description endpoint with security and error handling improvements
- Implemented path validation against ALLOWED_ROOT_DIRECTORY to prevent arbitrary file reads and prompt injection attacks. - Added error handling for file reading, including specific responses for forbidden paths and file not found scenarios. - Updated the description generation logic to truncate large files and provide structured prompts for analysis. - Enhanced logging for better traceability of file access and errors. These changes aim to improve the security and reliability of the file description functionality.
This commit is contained in:
@@ -3,12 +3,20 @@
|
||||
*
|
||||
* Uses Claude Haiku to analyze a text file and generate a concise description
|
||||
* suitable for context file metadata.
|
||||
*
|
||||
* SECURITY: This endpoint validates file paths against ALLOWED_ROOT_DIRECTORY
|
||||
* and reads file content directly (not via Claude's Read tool) to prevent
|
||||
* arbitrary file reads and prompt injection attacks.
|
||||
*/
|
||||
|
||||
import type { Request, Response } from 'express';
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { createLogger } from '@automaker/utils';
|
||||
import { CLAUDE_MODEL_MAP } from '@automaker/types';
|
||||
import { PathNotAllowedError } from '@automaker/platform';
|
||||
import { createCustomOptions } from '../../../lib/sdk-options.js';
|
||||
import * as secureFs from '../../../lib/secure-fs.js';
|
||||
import * as path from 'path';
|
||||
|
||||
const logger = createLogger('DescribeFile');
|
||||
|
||||
@@ -40,20 +48,15 @@ interface DescribeFileErrorResponse {
|
||||
* Extract text content from Claude SDK response messages
|
||||
*/
|
||||
async function extractTextFromStream(
|
||||
stream: AsyncIterable<{
|
||||
type: string;
|
||||
subtype?: string;
|
||||
result?: string;
|
||||
message?: {
|
||||
content?: Array<{ type: string; text?: string }>;
|
||||
};
|
||||
}>
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
stream: AsyncIterable<any>
|
||||
): Promise<string> {
|
||||
let responseText = '';
|
||||
|
||||
for await (const msg of stream) {
|
||||
if (msg.type === 'assistant' && msg.message?.content) {
|
||||
for (const block of msg.message.content) {
|
||||
const blocks = msg.message.content as Array<{ type: string; text?: string }>;
|
||||
for (const block of blocks) {
|
||||
if (block.type === 'text' && block.text) {
|
||||
responseText += block.text;
|
||||
}
|
||||
@@ -88,24 +91,101 @@ export function createDescribeFileHandler(): (req: Request, res: Response) => Pr
|
||||
|
||||
logger.info(`[DescribeFile] Starting description generation for: ${filePath}`);
|
||||
|
||||
// Build prompt that explicitly asks to read and describe the file
|
||||
const prompt = `Read the file at "${filePath}" and describe what it contains.
|
||||
// Resolve the path for logging and cwd derivation
|
||||
const resolvedPath = secureFs.resolvePath(filePath);
|
||||
|
||||
After reading the file, provide a 1-2 sentence description suitable for use as context in an AI coding assistant. Focus on what the file contains, its purpose, and why an AI agent might want to use this context in the future (e.g., "API documentation for the authentication endpoints", "Configuration file for database connections", "Coding style guidelines for the project").
|
||||
// Read file content using secureFs (validates path against ALLOWED_ROOT_DIRECTORY)
|
||||
// This prevents arbitrary file reads (e.g., /etc/passwd, ~/.ssh/id_rsa)
|
||||
// and prompt injection attacks where malicious filePath values could inject instructions
|
||||
let fileContent: string;
|
||||
try {
|
||||
const content = await secureFs.readFile(resolvedPath, 'utf-8');
|
||||
fileContent = typeof content === 'string' ? content : content.toString('utf-8');
|
||||
} catch (readError) {
|
||||
// Path not allowed - return 403 Forbidden
|
||||
if (readError instanceof PathNotAllowedError) {
|
||||
logger.warn(`[DescribeFile] Path not allowed: ${filePath}`);
|
||||
const response: DescribeFileErrorResponse = {
|
||||
success: false,
|
||||
error: 'File path is not within the allowed directory',
|
||||
};
|
||||
res.status(403).json(response);
|
||||
return;
|
||||
}
|
||||
|
||||
Respond with ONLY the description text, no additional formatting, preamble, or explanation.`;
|
||||
// File not found
|
||||
if (
|
||||
readError !== null &&
|
||||
typeof readError === 'object' &&
|
||||
'code' in readError &&
|
||||
readError.code === 'ENOENT'
|
||||
) {
|
||||
logger.warn(`[DescribeFile] File not found: ${resolvedPath}`);
|
||||
const response: DescribeFileErrorResponse = {
|
||||
success: false,
|
||||
error: `File not found: ${filePath}`,
|
||||
};
|
||||
res.status(404).json(response);
|
||||
return;
|
||||
}
|
||||
|
||||
// Use Claude SDK query function - needs 3+ turns for: tool call, tool result, response
|
||||
const stream = query({
|
||||
prompt,
|
||||
options: {
|
||||
model: CLAUDE_MODEL_MAP.haiku,
|
||||
maxTurns: 3,
|
||||
allowedTools: ['Read'],
|
||||
permissionMode: 'acceptEdits',
|
||||
},
|
||||
const errorMessage = readError instanceof Error ? readError.message : 'Unknown error';
|
||||
logger.error(`[DescribeFile] Failed to read file: ${errorMessage}`);
|
||||
const response: DescribeFileErrorResponse = {
|
||||
success: false,
|
||||
error: `Failed to read file: ${errorMessage}`,
|
||||
};
|
||||
res.status(500).json(response);
|
||||
return;
|
||||
}
|
||||
|
||||
// Truncate very large files to avoid token limits
|
||||
const MAX_CONTENT_LENGTH = 50000;
|
||||
const truncated = fileContent.length > MAX_CONTENT_LENGTH;
|
||||
const contentToAnalyze = truncated
|
||||
? fileContent.substring(0, MAX_CONTENT_LENGTH)
|
||||
: fileContent;
|
||||
|
||||
// Get the filename for context
|
||||
const fileName = path.basename(resolvedPath);
|
||||
|
||||
// Build prompt with file content passed as structured data
|
||||
// The file content is included directly, not via tool invocation
|
||||
const instructionText = `Analyze the following file and provide a 1-2 sentence description suitable for use as context in an AI coding assistant. Focus on what the file contains, its purpose, and why an AI agent might want to use this context in the future (e.g., "API documentation for the authentication endpoints", "Configuration file for database connections", "Coding style guidelines for the project").
|
||||
|
||||
Respond with ONLY the description text, no additional formatting, preamble, or explanation.
|
||||
|
||||
File: ${fileName}${truncated ? ' (truncated)' : ''}`;
|
||||
|
||||
const promptContent = [
|
||||
{ type: 'text' as const, text: instructionText },
|
||||
{ type: 'text' as const, text: `\n\n--- FILE CONTENT ---\n${contentToAnalyze}` },
|
||||
];
|
||||
|
||||
// Use the file's directory as the working directory
|
||||
const cwd = path.dirname(resolvedPath);
|
||||
|
||||
// Use centralized SDK options with proper cwd validation
|
||||
// No tools needed since we're passing file content directly
|
||||
const sdkOptions = createCustomOptions({
|
||||
cwd,
|
||||
model: CLAUDE_MODEL_MAP.haiku,
|
||||
maxTurns: 1,
|
||||
allowedTools: [],
|
||||
sandbox: { enabled: true, autoAllowBashIfSandboxed: true },
|
||||
});
|
||||
|
||||
const promptGenerator = (async function* () {
|
||||
yield {
|
||||
type: 'user' as const,
|
||||
session_id: '',
|
||||
message: { role: 'user' as const, content: promptContent },
|
||||
parent_tool_use_id: null,
|
||||
};
|
||||
})();
|
||||
|
||||
const stream = query({ prompt: promptGenerator, options: sdkOptions });
|
||||
|
||||
// Extract the description from the response
|
||||
const description = await extractTextFromStream(stream);
|
||||
|
||||
|
||||
@@ -20,6 +20,34 @@ import * as path from 'path';
|
||||
|
||||
const logger = createLogger('DescribeImage');
|
||||
|
||||
/**
|
||||
* Allowlist of safe headers to log
|
||||
* All other headers are excluded to prevent leaking sensitive values
|
||||
*/
|
||||
const SAFE_HEADERS_ALLOWLIST = new Set([
|
||||
'content-type',
|
||||
'accept',
|
||||
'user-agent',
|
||||
'host',
|
||||
'referer',
|
||||
'content-length',
|
||||
'origin',
|
||||
'x-request-id',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Filter request headers to only include safe, non-sensitive values
|
||||
*/
|
||||
function filterSafeHeaders(headers: Record<string, unknown>): Record<string, unknown> {
|
||||
const filtered: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(headers)) {
|
||||
if (SAFE_HEADERS_ALLOWLIST.has(key.toLowerCase())) {
|
||||
filtered[key] = value;
|
||||
}
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the actual file path, handling Unicode character variations.
|
||||
* macOS screenshots use U+202F (NARROW NO-BREAK SPACE) before AM/PM,
|
||||
@@ -206,8 +234,9 @@ export function createDescribeImageHandler(): (req: Request, res: Response) => P
|
||||
const startedAt = Date.now();
|
||||
|
||||
// Request envelope logs (high value when correlating failures)
|
||||
// Only log safe headers to prevent leaking sensitive values (auth tokens, cookies, etc.)
|
||||
logger.info(`[${requestId}] ===== POST /api/context/describe-image =====`);
|
||||
logger.info(`[${requestId}] headers=${JSON.stringify(req.headers)}`);
|
||||
logger.info(`[${requestId}] headers=${JSON.stringify(filterSafeHeaders(req.headers))}`);
|
||||
logger.info(`[${requestId}] body=${JSON.stringify(req.body)}`);
|
||||
|
||||
try {
|
||||
|
||||
@@ -33,6 +33,7 @@ import {
|
||||
isImageFile,
|
||||
fileToText,
|
||||
getTextFileMimeType,
|
||||
formatFileSize,
|
||||
DEFAULT_MAX_FILE_SIZE,
|
||||
DEFAULT_MAX_FILES,
|
||||
} from '@/lib/image-utils';
|
||||
@@ -942,12 +943,3 @@ export function AgentView() {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Helper function to format file size
|
||||
function formatFileSize(bytes: number): string {
|
||||
if (bytes === 0) return '0 B';
|
||||
const k = 1024;
|
||||
const sizes = ['B', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
|
||||
}
|
||||
|
||||
@@ -298,6 +298,14 @@ export function ContextView() {
|
||||
|
||||
// Reload files to update UI with new description
|
||||
await loadContextFiles();
|
||||
|
||||
// Also update selectedFile if it's the one that just got described
|
||||
setSelectedFile((current) => {
|
||||
if (current?.name === fileName) {
|
||||
return { ...current, description };
|
||||
}
|
||||
return current;
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to generate description:', error);
|
||||
@@ -747,37 +755,33 @@ export function ContextView() {
|
||||
return (
|
||||
<div
|
||||
key={file.path}
|
||||
onClick={() => handleSelectFile(file)}
|
||||
className={cn(
|
||||
'group w-full flex items-center gap-2 px-3 py-2 rounded-lg transition-colors',
|
||||
'group w-full flex items-center gap-2 px-3 py-2 rounded-lg transition-colors cursor-pointer',
|
||||
selectedFile?.path === file.path
|
||||
? 'bg-primary/20 text-foreground border border-primary/30'
|
||||
: 'text-muted-foreground hover:bg-accent hover:text-foreground'
|
||||
)}
|
||||
data-testid={`context-file-${file.name}`}
|
||||
>
|
||||
<button
|
||||
onClick={() => handleSelectFile(file)}
|
||||
className="flex-1 flex items-center gap-2 text-left min-w-0"
|
||||
data-testid={`context-file-${file.name}`}
|
||||
>
|
||||
{file.type === 'image' ? (
|
||||
<ImageIcon className="w-4 h-4 flex-shrink-0" />
|
||||
) : (
|
||||
<FileText className="w-4 h-4 flex-shrink-0" />
|
||||
)}
|
||||
<div className="min-w-0 flex-1">
|
||||
<span className="truncate text-sm block">{file.name}</span>
|
||||
{isGenerating ? (
|
||||
<span className="flex items-center gap-1 text-xs text-muted-foreground">
|
||||
<Loader2 className="w-3 h-3 animate-spin" />
|
||||
Generating description...
|
||||
</span>
|
||||
) : file.description ? (
|
||||
<span className="truncate text-xs text-muted-foreground block">
|
||||
{file.description}
|
||||
</span>
|
||||
) : null}
|
||||
</div>
|
||||
</button>
|
||||
{file.type === 'image' ? (
|
||||
<ImageIcon className="w-4 h-4 flex-shrink-0" />
|
||||
) : (
|
||||
<FileText className="w-4 h-4 flex-shrink-0" />
|
||||
)}
|
||||
<div className="min-w-0 flex-1">
|
||||
<span className="truncate text-sm block">{file.name}</span>
|
||||
{isGenerating ? (
|
||||
<span className="flex items-center gap-1 text-xs text-muted-foreground">
|
||||
<Loader2 className="w-3 h-3 animate-spin" />
|
||||
Generating description...
|
||||
</span>
|
||||
) : file.description ? (
|
||||
<span className="truncate text-xs text-muted-foreground block">
|
||||
{file.description}
|
||||
</span>
|
||||
) : null}
|
||||
</div>
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<button
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*/
|
||||
export const CLAUDE_MODEL_MAP: Record<string, string> = {
|
||||
haiku: 'claude-haiku-4-5-20251001',
|
||||
sonnet: 'claude-sonnet-4-20250514',
|
||||
sonnet: 'claude-sonnet-4-5-20250929',
|
||||
opus: 'claude-opus-4-5-20251101',
|
||||
} as const;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user