From b628aa705162695867f1df0574b2124fc2ad2d77 Mon Sep 17 00:00:00 2001 From: Auto Date: Fri, 2 Jan 2026 10:12:04 +0200 Subject: [PATCH] feat: Add image upload support for Spec Creation chat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the ability to attach images (JPEG, PNG) in the Spec Creation chat interface for Claude to analyze during app specification creation. Frontend changes: - Add ImageAttachment interface to types.ts with id, filename, mimeType, base64Data, previewUrl, and size fields - Update ChatMessage interface with optional attachments field - Update useSpecChat hook to accept and send attachments via WebSocket - Add file input, drag-drop support, and preview thumbnails to SpecCreationChat component with validation (5 MB max, JPEG/PNG only) - Update ChatMessage component to render image attachments with click-to-enlarge functionality Backend changes: - Add ImageAttachment Pydantic schema with base64 validation - Update spec_creation.py WebSocket handler to parse and validate image attachments from client messages - Update spec_chat_session.py to format multimodal content blocks for Claude API using async generator pattern Features: - Drag-and-drop or click paperclip button to attach images - Preview thumbnails with remove button before sending - File type validation (image/jpeg, image/png) - File size validation (5 MB maximum) - Images display in chat history - Click images to view full size - Cross-platform compatible (Windows, macOS, Linux) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- server/routers/spec_creation.py | 26 ++++- server/schemas.py | 35 ++++++- server/services/spec_chat_session.py | 66 +++++++++++- ui/src/components/ChatMessage.tsx | 83 ++++++++++----- ui/src/components/SpecCreationChat.tsx | 140 +++++++++++++++++++++++-- ui/src/hooks/useSpecChat.ts | 26 ++++- ui/src/lib/types.ts | 11 ++ 7 files changed, 335 insertions(+), 52 deletions(-) diff --git a/server/routers/spec_creation.py b/server/routers/spec_creation.py index 5600a2f..2ce66be 100644 --- a/server/routers/spec_creation.py +++ b/server/routers/spec_creation.py @@ -13,8 +13,9 @@ from pathlib import Path from typing import Any, Optional from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException -from pydantic import BaseModel +from pydantic import BaseModel, ValidationError +from ..schemas import ImageAttachment from ..services.spec_chat_session import ( SpecChatSession, get_session, @@ -191,7 +192,24 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str): continue user_content = message.get("content", "").strip() - if not user_content: + + # Parse attachments if present + attachments: list[ImageAttachment] = [] + raw_attachments = message.get("attachments", []) + if raw_attachments: + try: + for raw_att in raw_attachments: + attachments.append(ImageAttachment(**raw_att)) + except (ValidationError, Exception) as e: + logger.warning(f"Invalid attachment data: {e}") + await websocket.send_json({ + "type": "error", + "content": f"Invalid attachment: {str(e)}" + }) + continue + + # Allow empty content if attachments are present + if not user_content and not attachments: await websocket.send_json({ "type": "error", "content": "Empty message" @@ -202,8 +220,8 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str): spec_complete_received = False spec_path = None - # Stream Claude's response - async for chunk in session.send_message(user_content): + # Stream Claude's response (with attachments if present) + async for chunk in session.send_message(user_content, attachments if attachments else None): # Track spec_complete but don't send complete yet if chunk.get("type") == "spec_complete": spec_complete_received = True diff --git a/server/schemas.py b/server/schemas.py index ab78080..723d460 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -5,9 +5,10 @@ Pydantic Schemas Request/Response models for the API endpoints. """ +import base64 from datetime import datetime from typing import Literal -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator # ============================================================================ @@ -163,6 +164,38 @@ class WSAgentStatusMessage(BaseModel): status: str +# ============================================================================ +# Spec Chat Schemas +# ============================================================================ + +# Maximum image file size: 5 MB +MAX_IMAGE_SIZE = 5 * 1024 * 1024 + + +class ImageAttachment(BaseModel): + """Image attachment from client for spec creation chat.""" + filename: str = Field(..., min_length=1, max_length=255) + mimeType: Literal['image/jpeg', 'image/png'] + base64Data: str + + @field_validator('base64Data') + @classmethod + def validate_base64_and_size(cls, v: str) -> str: + """Validate that base64 data is valid and within size limit.""" + try: + decoded = base64.b64decode(v) + if len(decoded) > MAX_IMAGE_SIZE: + raise ValueError( + f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds ' + f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB' + ) + return v + except Exception as e: + if 'Image size' in str(e): + raise + raise ValueError(f'Invalid base64 data: {e}') + + # ============================================================================ # Filesystem Schemas # ============================================================================ diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py index a1a55db..1d59532 100644 --- a/server/services/spec_chat_session.py +++ b/server/services/spec_chat_session.py @@ -17,8 +17,28 @@ from typing import AsyncGenerator, Optional from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient +from ..schemas import ImageAttachment + logger = logging.getLogger(__name__) + +async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: + """ + Create an async generator that yields a properly formatted multimodal message. + + The Claude Agent SDK's query() method accepts either: + - A string (simple text) + - An AsyncIterable[dict] (for custom message formats) + + This function wraps content blocks in the expected message format. + """ + yield { + "type": "user", + "message": {"role": "user", "content": content_blocks}, + "parent_tool_use_id": None, + "session_id": "default", + } + # Root directory of the project ROOT_DIR = Path(__file__).parent.parent.parent @@ -166,12 +186,17 @@ class SpecChatSession: "content": f"Failed to start conversation: {str(e)}" } - async def send_message(self, user_message: str) -> AsyncGenerator[dict, None]: + async def send_message( + self, + user_message: str, + attachments: list[ImageAttachment] | None = None + ) -> AsyncGenerator[dict, None]: """ Send user message and stream Claude's response. Args: user_message: The user's response + attachments: Optional list of image attachments Yields: Message chunks of various types: @@ -191,11 +216,12 @@ class SpecChatSession: self.messages.append({ "role": "user", "content": user_message, + "has_attachments": bool(attachments), "timestamp": datetime.now().isoformat() }) try: - async for chunk in self._query_claude(user_message): + async for chunk in self._query_claude(user_message, attachments): yield chunk # Signal that the response is complete (for UI to hide loading indicator) yield {"type": "response_done"} @@ -206,11 +232,16 @@ class SpecChatSession: "content": f"Error: {str(e)}" } - async def _query_claude(self, message: str) -> AsyncGenerator[dict, None]: + async def _query_claude( + self, + message: str, + attachments: list[ImageAttachment] | None = None + ) -> AsyncGenerator[dict, None]: """ Internal method to query Claude and stream responses. Handles tool calls (Write) and text responses. + Supports multimodal content with image attachments. IMPORTANT: Spec creation requires BOTH files to be written: 1. app_spec.txt - the main specification @@ -221,8 +252,33 @@ class SpecChatSession: if not self.client: return - # Send the message to Claude using the SDK's query method - await self.client.query(message) + # Build the message content + if attachments and len(attachments) > 0: + # Multimodal message: build content blocks array + content_blocks = [] + + # Add text block if there's text + if message: + content_blocks.append({"type": "text", "text": message}) + + # Add image blocks + for att in attachments: + content_blocks.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": att.mimeType, + "data": att.base64Data, + } + }) + + # Send multimodal content to Claude using async generator format + # The SDK's query() accepts AsyncIterable[dict] for custom message formats + await self.client.query(_make_multimodal_message(content_blocks)) + logger.info(f"Sent multimodal message with {len(attachments)} image(s)") + else: + # Text-only message: use string format + await self.client.query(message) current_text = "" diff --git a/ui/src/components/ChatMessage.tsx b/ui/src/components/ChatMessage.tsx index 66341ad..3eb9600 100644 --- a/ui/src/components/ChatMessage.tsx +++ b/ui/src/components/ChatMessage.tsx @@ -13,7 +13,7 @@ interface ChatMessageProps { } export function ChatMessage({ message }: ChatMessageProps) { - const { role, content, timestamp, isStreaming } = message + const { role, content, attachments, timestamp, isStreaming } = message // Format timestamp const timeString = timestamp.toLocaleTimeString([], { @@ -103,38 +103,63 @@ export function ChatMessage({ message }: ChatMessageProps) { `} > {/* Parse content for basic markdown-like formatting */} -
- {content.split('\n').map((line, i) => { - // Bold text - const boldRegex = /\*\*(.*?)\*\*/g - const parts = [] - let lastIndex = 0 - let match + {content && ( +
+ {content.split('\n').map((line, i) => { + // Bold text + const boldRegex = /\*\*(.*?)\*\*/g + const parts = [] + let lastIndex = 0 + let match - while ((match = boldRegex.exec(line)) !== null) { - if (match.index > lastIndex) { - parts.push(line.slice(lastIndex, match.index)) + while ((match = boldRegex.exec(line)) !== null) { + if (match.index > lastIndex) { + parts.push(line.slice(lastIndex, match.index)) + } + parts.push( + + {match[1]} + + ) + lastIndex = match.index + match[0].length } - parts.push( - - {match[1]} - + + if (lastIndex < line.length) { + parts.push(line.slice(lastIndex)) + } + + return ( + + {parts.length > 0 ? parts : line} + {i < content.split('\n').length - 1 && '\n'} + ) - lastIndex = match.index + match[0].length - } + })} +
+ )} - if (lastIndex < line.length) { - parts.push(line.slice(lastIndex)) - } - - return ( - - {parts.length > 0 ? parts : line} - {i < content.split('\n').length - 1 && '\n'} - - ) - })} -
+ {/* Display image attachments */} + {attachments && attachments.length > 0 && ( +
+ {attachments.map((attachment) => ( +
+ {attachment.filename} window.open(attachment.previewUrl, '_blank')} + title={`${attachment.filename} (click to enlarge)`} + /> + + {attachment.filename} + +
+ ))} +
+ )} {/* Streaming indicator */} {isStreaming && ( diff --git a/ui/src/components/SpecCreationChat.tsx b/ui/src/components/SpecCreationChat.tsx index e63e298..578cd55 100644 --- a/ui/src/components/SpecCreationChat.tsx +++ b/ui/src/components/SpecCreationChat.tsx @@ -5,12 +5,17 @@ * Handles the 7-phase conversation flow for creating app specifications. */ -import { useEffect, useRef, useState } from 'react' -import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Loader2, ArrowRight, Zap } from 'lucide-react' +import { useCallback, useEffect, useRef, useState } from 'react' +import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Loader2, ArrowRight, Zap, Paperclip } from 'lucide-react' import { useSpecChat } from '../hooks/useSpecChat' import { ChatMessage } from './ChatMessage' import { QuestionOptions } from './QuestionOptions' import { TypingIndicator } from './TypingIndicator' +import type { ImageAttachment } from '../lib/types' + +// Image upload validation constants +const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB +const ALLOWED_TYPES = ['image/jpeg', 'image/png'] type InitializerStatus = 'idle' | 'starting' | 'error' @@ -34,8 +39,10 @@ export function SpecCreationChat({ const [input, setInput] = useState('') const [error, setError] = useState(null) const [yoloEnabled, setYoloEnabled] = useState(false) + const [pendingAttachments, setPendingAttachments] = useState([]) const messagesEndRef = useRef(null) const inputRef = useRef(null) + const fileInputRef = useRef(null) const { messages, @@ -76,10 +83,12 @@ export function SpecCreationChat({ const handleSendMessage = () => { const trimmed = input.trim() - if (!trimmed || isLoading) return + // Allow sending if there's text OR attachments + if ((!trimmed && pendingAttachments.length === 0) || isLoading) return - sendMessage(trimmed) + sendMessage(trimmed, pendingAttachments.length > 0 ? pendingAttachments : undefined) setInput('') + setPendingAttachments([]) // Clear attachments after sending } const handleKeyDown = (e: React.KeyboardEvent) => { @@ -93,6 +102,61 @@ export function SpecCreationChat({ sendAnswer(answers) } + // File handling for image attachments + const handleFileSelect = useCallback((files: FileList | null) => { + if (!files) return + + Array.from(files).forEach((file) => { + // Validate file type + if (!ALLOWED_TYPES.includes(file.type)) { + setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`) + return + } + + // Validate file size + if (file.size > MAX_FILE_SIZE) { + setError(`File too large: ${file.name}. Maximum size is 5 MB.`) + return + } + + // Read and convert to base64 + const reader = new FileReader() + reader.onload = (e) => { + const dataUrl = e.target?.result as string + // dataUrl is "data:image/png;base64,XXXXXX" + const base64Data = dataUrl.split(',')[1] + + const attachment: ImageAttachment = { + id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, + filename: file.name, + mimeType: file.type as 'image/jpeg' | 'image/png', + base64Data, + previewUrl: dataUrl, + size: file.size, + } + + setPendingAttachments((prev) => [...prev, attachment]) + } + reader.readAsDataURL(file) + }) + }, []) + + const handleRemoveAttachment = useCallback((id: string) => { + setPendingAttachments((prev) => prev.filter((a) => a.id !== id)) + }, []) + + const handleDrop = useCallback( + (e: React.DragEvent) => { + e.preventDefault() + handleFileSelect(e.dataTransfer.files) + }, + [handleFileSelect] + ) + + const handleDragOver = useCallback((e: React.DragEvent) => { + e.preventDefault() + }, []) + // Connection status indicator const ConnectionIndicator = () => { switch (connectionStatus) { @@ -216,8 +280,62 @@ export function SpecCreationChat({ {/* Input area */} {!isComplete && ( -
+
+ {/* Attachment previews */} + {pendingAttachments.length > 0 && ( +
+ {pendingAttachments.map((attachment) => ( +
+ {attachment.filename} + + + {attachment.filename.length > 10 + ? `${attachment.filename.substring(0, 7)}...` + : attachment.filename} + +
+ ))} +
+ )} +
+ {/* Hidden file input */} + handleFileSelect(e.target.files)} + className="hidden" + /> + + {/* Attach button */} + + 0 + ? 'Add a message with your image(s)...' + : 'Type your response...' } className="neo-input flex-1" disabled={(isLoading && !currentQuestions) || connectionStatus !== 'connected'} />
)} diff --git a/ui/src/hooks/useSpecChat.ts b/ui/src/hooks/useSpecChat.ts index 4442466..727e77a 100644 --- a/ui/src/hooks/useSpecChat.ts +++ b/ui/src/hooks/useSpecChat.ts @@ -3,7 +3,7 @@ */ import { useState, useCallback, useRef, useEffect } from 'react' -import type { ChatMessage, SpecChatServerMessage, SpecQuestion } from '../lib/types' +import type { ChatMessage, ImageAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types' type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error' @@ -21,7 +21,7 @@ interface UseSpecChatReturn { currentQuestions: SpecQuestion[] | null currentToolId: string | null start: () => void - sendMessage: (content: string) => void + sendMessage: (content: string, attachments?: ImageAttachment[]) => void sendAnswer: (answers: Record) => void disconnect: () => void } @@ -303,19 +303,20 @@ export function useSpecChat({ setTimeout(checkAndSend, 100) }, [connect]) - const sendMessage = useCallback((content: string) => { + const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => { if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) { onError?.('Not connected') return } - // Add user message to chat + // Add user message to chat (with attachments for display) setMessages((prev) => [ ...prev, { id: generateId(), role: 'user', content, + attachments, timestamp: new Date(), }, ]) @@ -325,8 +326,23 @@ export function useSpecChat({ setCurrentToolId(null) setIsLoading(true) + // Build message payload + const payload: { type: string; content: string; attachments?: Array<{ filename: string; mimeType: string; base64Data: string }> } = { + type: 'message', + content, + } + + // Add attachments if present (send base64 data, not preview URL) + if (attachments && attachments.length > 0) { + payload.attachments = attachments.map((a) => ({ + filename: a.filename, + mimeType: a.mimeType, + base64Data: a.base64Data, + })) + } + // Send to server - wsRef.current.send(JSON.stringify({ type: 'message', content })) + wsRef.current.send(JSON.stringify(payload)) }, [onError]) const sendAnswer = useCallback((answers: Record) => { diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index a40b9cb..f1b818b 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -209,11 +209,22 @@ export type SpecChatServerMessage = | SpecChatPongMessage | SpecChatResponseDoneMessage +// Image attachment for chat messages +export interface ImageAttachment { + id: string + filename: string + mimeType: 'image/jpeg' | 'image/png' + base64Data: string // Raw base64 (without data: prefix) + previewUrl: string // data: URL for display + size: number // File size in bytes +} + // UI chat message for display export interface ChatMessage { id: string role: 'user' | 'assistant' | 'system' content: string + attachments?: ImageAttachment[] timestamp: Date questions?: SpecQuestion[] isStreaming?: boolean