feat: Add image upload support for Spec Creation chat

Add the ability to attach images (JPEG, PNG) in the Spec Creation chat
interface for Claude to analyze during app specification creation.

Frontend changes:
- Add ImageAttachment interface to types.ts with id, filename, mimeType,
  base64Data, previewUrl, and size fields
- Update ChatMessage interface with optional attachments field
- Update useSpecChat hook to accept and send attachments via WebSocket
- Add file input, drag-drop support, and preview thumbnails to
  SpecCreationChat component with validation (5 MB max, JPEG/PNG only)
- Update ChatMessage component to render image attachments with
  click-to-enlarge functionality

Backend changes:
- Add ImageAttachment Pydantic schema with base64 validation
- Update spec_creation.py WebSocket handler to parse and validate
  image attachments from client messages
- Update spec_chat_session.py to format multimodal content blocks
  for Claude API using async generator pattern

Features:
- Drag-and-drop or click paperclip button to attach images
- Preview thumbnails with remove button before sending
- File type validation (image/jpeg, image/png)
- File size validation (5 MB maximum)
- Images display in chat history
- Click images to view full size
- Cross-platform compatible (Windows, macOS, Linux)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Auto
2026-01-02 10:12:04 +02:00
parent 05607b310a
commit b628aa7051
7 changed files with 335 additions and 52 deletions

View File

@@ -13,8 +13,9 @@ from pathlib import Path
from typing import Any, Optional
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException
from pydantic import BaseModel
from pydantic import BaseModel, ValidationError
from ..schemas import ImageAttachment
from ..services.spec_chat_session import (
SpecChatSession,
get_session,
@@ -191,7 +192,24 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
continue
user_content = message.get("content", "").strip()
if not user_content:
# Parse attachments if present
attachments: list[ImageAttachment] = []
raw_attachments = message.get("attachments", [])
if raw_attachments:
try:
for raw_att in raw_attachments:
attachments.append(ImageAttachment(**raw_att))
except (ValidationError, Exception) as e:
logger.warning(f"Invalid attachment data: {e}")
await websocket.send_json({
"type": "error",
"content": f"Invalid attachment: {str(e)}"
})
continue
# Allow empty content if attachments are present
if not user_content and not attachments:
await websocket.send_json({
"type": "error",
"content": "Empty message"
@@ -202,8 +220,8 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
spec_complete_received = False
spec_path = None
# Stream Claude's response
async for chunk in session.send_message(user_content):
# Stream Claude's response (with attachments if present)
async for chunk in session.send_message(user_content, attachments if attachments else None):
# Track spec_complete but don't send complete yet
if chunk.get("type") == "spec_complete":
spec_complete_received = True

View File

@@ -5,9 +5,10 @@ Pydantic Schemas
Request/Response models for the API endpoints.
"""
import base64
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator
# ============================================================================
@@ -163,6 +164,38 @@ class WSAgentStatusMessage(BaseModel):
status: str
# ============================================================================
# Spec Chat Schemas
# ============================================================================
# Maximum image file size: 5 MB
MAX_IMAGE_SIZE = 5 * 1024 * 1024
class ImageAttachment(BaseModel):
"""Image attachment from client for spec creation chat."""
filename: str = Field(..., min_length=1, max_length=255)
mimeType: Literal['image/jpeg', 'image/png']
base64Data: str
@field_validator('base64Data')
@classmethod
def validate_base64_and_size(cls, v: str) -> str:
"""Validate that base64 data is valid and within size limit."""
try:
decoded = base64.b64decode(v)
if len(decoded) > MAX_IMAGE_SIZE:
raise ValueError(
f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
)
return v
except Exception as e:
if 'Image size' in str(e):
raise
raise ValueError(f'Invalid base64 data: {e}')
# ============================================================================
# Filesystem Schemas
# ============================================================================

View File

@@ -17,8 +17,28 @@ from typing import AsyncGenerator, Optional
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from ..schemas import ImageAttachment
logger = logging.getLogger(__name__)
async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
"""
Create an async generator that yields a properly formatted multimodal message.
The Claude Agent SDK's query() method accepts either:
- A string (simple text)
- An AsyncIterable[dict] (for custom message formats)
This function wraps content blocks in the expected message format.
"""
yield {
"type": "user",
"message": {"role": "user", "content": content_blocks},
"parent_tool_use_id": None,
"session_id": "default",
}
# Root directory of the project
ROOT_DIR = Path(__file__).parent.parent.parent
@@ -166,12 +186,17 @@ class SpecChatSession:
"content": f"Failed to start conversation: {str(e)}"
}
async def send_message(self, user_message: str) -> AsyncGenerator[dict, None]:
async def send_message(
self,
user_message: str,
attachments: list[ImageAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Send user message and stream Claude's response.
Args:
user_message: The user's response
attachments: Optional list of image attachments
Yields:
Message chunks of various types:
@@ -191,11 +216,12 @@ class SpecChatSession:
self.messages.append({
"role": "user",
"content": user_message,
"has_attachments": bool(attachments),
"timestamp": datetime.now().isoformat()
})
try:
async for chunk in self._query_claude(user_message):
async for chunk in self._query_claude(user_message, attachments):
yield chunk
# Signal that the response is complete (for UI to hide loading indicator)
yield {"type": "response_done"}
@@ -206,11 +232,16 @@ class SpecChatSession:
"content": f"Error: {str(e)}"
}
async def _query_claude(self, message: str) -> AsyncGenerator[dict, None]:
async def _query_claude(
self,
message: str,
attachments: list[ImageAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Internal method to query Claude and stream responses.
Handles tool calls (Write) and text responses.
Supports multimodal content with image attachments.
IMPORTANT: Spec creation requires BOTH files to be written:
1. app_spec.txt - the main specification
@@ -221,8 +252,33 @@ class SpecChatSession:
if not self.client:
return
# Send the message to Claude using the SDK's query method
await self.client.query(message)
# Build the message content
if attachments and len(attachments) > 0:
# Multimodal message: build content blocks array
content_blocks = []
# Add text block if there's text
if message:
content_blocks.append({"type": "text", "text": message})
# Add image blocks
for att in attachments:
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
# Send multimodal content to Claude using async generator format
# The SDK's query() accepts AsyncIterable[dict] for custom message formats
await self.client.query(_make_multimodal_message(content_blocks))
logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
else:
# Text-only message: use string format
await self.client.query(message)
current_text = ""

View File

@@ -13,7 +13,7 @@ interface ChatMessageProps {
}
export function ChatMessage({ message }: ChatMessageProps) {
const { role, content, timestamp, isStreaming } = message
const { role, content, attachments, timestamp, isStreaming } = message
// Format timestamp
const timeString = timestamp.toLocaleTimeString([], {
@@ -103,38 +103,63 @@ export function ChatMessage({ message }: ChatMessageProps) {
`}
>
{/* Parse content for basic markdown-like formatting */}
<div className="whitespace-pre-wrap text-sm leading-relaxed text-[#1a1a1a]">
{content.split('\n').map((line, i) => {
// Bold text
const boldRegex = /\*\*(.*?)\*\*/g
const parts = []
let lastIndex = 0
let match
{content && (
<div className="whitespace-pre-wrap text-sm leading-relaxed text-[#1a1a1a]">
{content.split('\n').map((line, i) => {
// Bold text
const boldRegex = /\*\*(.*?)\*\*/g
const parts = []
let lastIndex = 0
let match
while ((match = boldRegex.exec(line)) !== null) {
if (match.index > lastIndex) {
parts.push(line.slice(lastIndex, match.index))
while ((match = boldRegex.exec(line)) !== null) {
if (match.index > lastIndex) {
parts.push(line.slice(lastIndex, match.index))
}
parts.push(
<strong key={`bold-${i}-${match.index}`} className="font-bold">
{match[1]}
</strong>
)
lastIndex = match.index + match[0].length
}
parts.push(
<strong key={`bold-${i}-${match.index}`} className="font-bold">
{match[1]}
</strong>
if (lastIndex < line.length) {
parts.push(line.slice(lastIndex))
}
return (
<span key={i}>
{parts.length > 0 ? parts : line}
{i < content.split('\n').length - 1 && '\n'}
</span>
)
lastIndex = match.index + match[0].length
}
})}
</div>
)}
if (lastIndex < line.length) {
parts.push(line.slice(lastIndex))
}
return (
<span key={i}>
{parts.length > 0 ? parts : line}
{i < content.split('\n').length - 1 && '\n'}
</span>
)
})}
</div>
{/* Display image attachments */}
{attachments && attachments.length > 0 && (
<div className={`flex flex-wrap gap-2 ${content ? 'mt-3' : ''}`}>
{attachments.map((attachment) => (
<div
key={attachment.id}
className="border-2 border-[var(--color-neo-border)] p-1 bg-white shadow-[2px_2px_0px_rgba(0,0,0,1)]"
>
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="max-w-48 max-h-48 object-contain cursor-pointer hover:opacity-90 transition-opacity"
onClick={() => window.open(attachment.previewUrl, '_blank')}
title={`${attachment.filename} (click to enlarge)`}
/>
<span className="text-xs text-[var(--color-neo-text-secondary)] block mt-1 text-center">
{attachment.filename}
</span>
</div>
))}
</div>
)}
{/* Streaming indicator */}
{isStreaming && (

View File

@@ -5,12 +5,17 @@
* Handles the 7-phase conversation flow for creating app specifications.
*/
import { useEffect, useRef, useState } from 'react'
import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Loader2, ArrowRight, Zap } from 'lucide-react'
import { useCallback, useEffect, useRef, useState } from 'react'
import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Loader2, ArrowRight, Zap, Paperclip } from 'lucide-react'
import { useSpecChat } from '../hooks/useSpecChat'
import { ChatMessage } from './ChatMessage'
import { QuestionOptions } from './QuestionOptions'
import { TypingIndicator } from './TypingIndicator'
import type { ImageAttachment } from '../lib/types'
// Image upload validation constants
const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB
const ALLOWED_TYPES = ['image/jpeg', 'image/png']
type InitializerStatus = 'idle' | 'starting' | 'error'
@@ -34,8 +39,10 @@ export function SpecCreationChat({
const [input, setInput] = useState('')
const [error, setError] = useState<string | null>(null)
const [yoloEnabled, setYoloEnabled] = useState(false)
const [pendingAttachments, setPendingAttachments] = useState<ImageAttachment[]>([])
const messagesEndRef = useRef<HTMLDivElement>(null)
const inputRef = useRef<HTMLInputElement>(null)
const fileInputRef = useRef<HTMLInputElement>(null)
const {
messages,
@@ -76,10 +83,12 @@ export function SpecCreationChat({
const handleSendMessage = () => {
const trimmed = input.trim()
if (!trimmed || isLoading) return
// Allow sending if there's text OR attachments
if ((!trimmed && pendingAttachments.length === 0) || isLoading) return
sendMessage(trimmed)
sendMessage(trimmed, pendingAttachments.length > 0 ? pendingAttachments : undefined)
setInput('')
setPendingAttachments([]) // Clear attachments after sending
}
const handleKeyDown = (e: React.KeyboardEvent) => {
@@ -93,6 +102,61 @@ export function SpecCreationChat({
sendAnswer(answers)
}
// File handling for image attachments
const handleFileSelect = useCallback((files: FileList | null) => {
if (!files) return
Array.from(files).forEach((file) => {
// Validate file type
if (!ALLOWED_TYPES.includes(file.type)) {
setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`)
return
}
// Validate file size
if (file.size > MAX_FILE_SIZE) {
setError(`File too large: ${file.name}. Maximum size is 5 MB.`)
return
}
// Read and convert to base64
const reader = new FileReader()
reader.onload = (e) => {
const dataUrl = e.target?.result as string
// dataUrl is "data:image/png;base64,XXXXXX"
const base64Data = dataUrl.split(',')[1]
const attachment: ImageAttachment = {
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
filename: file.name,
mimeType: file.type as 'image/jpeg' | 'image/png',
base64Data,
previewUrl: dataUrl,
size: file.size,
}
setPendingAttachments((prev) => [...prev, attachment])
}
reader.readAsDataURL(file)
})
}, [])
const handleRemoveAttachment = useCallback((id: string) => {
setPendingAttachments((prev) => prev.filter((a) => a.id !== id))
}, [])
const handleDrop = useCallback(
(e: React.DragEvent) => {
e.preventDefault()
handleFileSelect(e.dataTransfer.files)
},
[handleFileSelect]
)
const handleDragOver = useCallback((e: React.DragEvent) => {
e.preventDefault()
}, [])
// Connection status indicator
const ConnectionIndicator = () => {
switch (connectionStatus) {
@@ -216,8 +280,62 @@ export function SpecCreationChat({
{/* Input area */}
{!isComplete && (
<div className="p-4 border-t-3 border-[var(--color-neo-border)] bg-white">
<div
className="p-4 border-t-3 border-[var(--color-neo-border)] bg-white"
onDrop={handleDrop}
onDragOver={handleDragOver}
>
{/* Attachment previews */}
{pendingAttachments.length > 0 && (
<div className="flex flex-wrap gap-2 mb-3">
{pendingAttachments.map((attachment) => (
<div
key={attachment.id}
className="relative group border-2 border-[var(--color-neo-border)] p-1 bg-white shadow-[2px_2px_0px_rgba(0,0,0,1)]"
>
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="w-16 h-16 object-cover"
/>
<button
onClick={() => handleRemoveAttachment(attachment.id)}
className="absolute -top-2 -right-2 bg-[var(--color-neo-danger)] text-white rounded-full p-0.5 border-2 border-[var(--color-neo-border)] hover:scale-110 transition-transform"
title="Remove attachment"
>
<X size={12} />
</button>
<span className="text-xs truncate block max-w-16 mt-1 text-center">
{attachment.filename.length > 10
? `${attachment.filename.substring(0, 7)}...`
: attachment.filename}
</span>
</div>
))}
</div>
)}
<div className="flex gap-3">
{/* Hidden file input */}
<input
ref={fileInputRef}
type="file"
accept="image/jpeg,image/png"
multiple
onChange={(e) => handleFileSelect(e.target.files)}
className="hidden"
/>
{/* Attach button */}
<button
onClick={() => fileInputRef.current?.click()}
disabled={connectionStatus !== 'connected'}
className="neo-btn neo-btn-ghost p-3"
title="Attach image (JPEG, PNG - max 5MB)"
>
<Paperclip size={18} />
</button>
<input
ref={inputRef}
type="text"
@@ -227,14 +345,20 @@ export function SpecCreationChat({
placeholder={
currentQuestions
? 'Or type a custom response...'
: 'Type your response...'
: pendingAttachments.length > 0
? 'Add a message with your image(s)...'
: 'Type your response...'
}
className="neo-input flex-1"
disabled={(isLoading && !currentQuestions) || connectionStatus !== 'connected'}
/>
<button
onClick={handleSendMessage}
disabled={!input.trim() || (isLoading && !currentQuestions) || connectionStatus !== 'connected'}
disabled={
(!input.trim() && pendingAttachments.length === 0) ||
(isLoading && !currentQuestions) ||
connectionStatus !== 'connected'
}
className="neo-btn neo-btn-primary px-6"
>
<Send size={18} />
@@ -243,7 +367,7 @@ export function SpecCreationChat({
{/* Help text */}
<p className="text-xs text-[var(--color-neo-text-secondary)] mt-2">
Press Enter to send. Claude will guide you through creating your app specification.
Press Enter to send. Drag & drop or click <Paperclip size={12} className="inline" /> to attach images (JPEG/PNG, max 5MB).
</p>
</div>
)}

View File

@@ -3,7 +3,7 @@
*/
import { useState, useCallback, useRef, useEffect } from 'react'
import type { ChatMessage, SpecChatServerMessage, SpecQuestion } from '../lib/types'
import type { ChatMessage, ImageAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types'
type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'
@@ -21,7 +21,7 @@ interface UseSpecChatReturn {
currentQuestions: SpecQuestion[] | null
currentToolId: string | null
start: () => void
sendMessage: (content: string) => void
sendMessage: (content: string, attachments?: ImageAttachment[]) => void
sendAnswer: (answers: Record<string, string | string[]>) => void
disconnect: () => void
}
@@ -303,19 +303,20 @@ export function useSpecChat({
setTimeout(checkAndSend, 100)
}, [connect])
const sendMessage = useCallback((content: string) => {
const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => {
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
onError?.('Not connected')
return
}
// Add user message to chat
// Add user message to chat (with attachments for display)
setMessages((prev) => [
...prev,
{
id: generateId(),
role: 'user',
content,
attachments,
timestamp: new Date(),
},
])
@@ -325,8 +326,23 @@ export function useSpecChat({
setCurrentToolId(null)
setIsLoading(true)
// Build message payload
const payload: { type: string; content: string; attachments?: Array<{ filename: string; mimeType: string; base64Data: string }> } = {
type: 'message',
content,
}
// Add attachments if present (send base64 data, not preview URL)
if (attachments && attachments.length > 0) {
payload.attachments = attachments.map((a) => ({
filename: a.filename,
mimeType: a.mimeType,
base64Data: a.base64Data,
}))
}
// Send to server
wsRef.current.send(JSON.stringify({ type: 'message', content }))
wsRef.current.send(JSON.stringify(payload))
}, [onError])
const sendAnswer = useCallback((answers: Record<string, string | string[]>) => {

View File

@@ -209,11 +209,22 @@ export type SpecChatServerMessage =
| SpecChatPongMessage
| SpecChatResponseDoneMessage
// Image attachment for chat messages
export interface ImageAttachment {
id: string
filename: string
mimeType: 'image/jpeg' | 'image/png'
base64Data: string // Raw base64 (without data: prefix)
previewUrl: string // data: URL for display
size: number // File size in bytes
}
// UI chat message for display
export interface ChatMessage {
id: string
role: 'user' | 'assistant' | 'system'
content: string
attachments?: ImageAttachment[]
timestamp: Date
questions?: SpecQuestion[]
isStreaming?: boolean