diff --git a/server/routers/spec_creation.py b/server/routers/spec_creation.py
index 5600a2f..2ce66be 100644
--- a/server/routers/spec_creation.py
+++ b/server/routers/spec_creation.py
@@ -13,8 +13,9 @@ from pathlib import Path
from typing import Any, Optional
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
+from ..schemas import ImageAttachment
from ..services.spec_chat_session import (
SpecChatSession,
get_session,
@@ -191,7 +192,24 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
continue
user_content = message.get("content", "").strip()
- if not user_content:
+
+ # Parse attachments if present
+ attachments: list[ImageAttachment] = []
+ raw_attachments = message.get("attachments", [])
+ if raw_attachments:
+ try:
+ for raw_att in raw_attachments:
+ attachments.append(ImageAttachment(**raw_att))
+ except (ValidationError, Exception) as e:
+ logger.warning(f"Invalid attachment data: {e}")
+ await websocket.send_json({
+ "type": "error",
+ "content": f"Invalid attachment: {str(e)}"
+ })
+ continue
+
+ # Allow empty content if attachments are present
+ if not user_content and not attachments:
await websocket.send_json({
"type": "error",
"content": "Empty message"
@@ -202,8 +220,8 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
spec_complete_received = False
spec_path = None
- # Stream Claude's response
- async for chunk in session.send_message(user_content):
+ # Stream Claude's response (with attachments if present)
+ async for chunk in session.send_message(user_content, attachments if attachments else None):
# Track spec_complete but don't send complete yet
if chunk.get("type") == "spec_complete":
spec_complete_received = True
diff --git a/server/schemas.py b/server/schemas.py
index ab78080..723d460 100644
--- a/server/schemas.py
+++ b/server/schemas.py
@@ -5,9 +5,10 @@ Pydantic Schemas
Request/Response models for the API endpoints.
"""
+import base64
from datetime import datetime
from typing import Literal
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
# ============================================================================
@@ -163,6 +164,38 @@ class WSAgentStatusMessage(BaseModel):
status: str
+# ============================================================================
+# Spec Chat Schemas
+# ============================================================================
+
+# Maximum image file size: 5 MB
+MAX_IMAGE_SIZE = 5 * 1024 * 1024
+
+
+class ImageAttachment(BaseModel):
+ """Image attachment from client for spec creation chat."""
+ filename: str = Field(..., min_length=1, max_length=255)
+ mimeType: Literal['image/jpeg', 'image/png']
+ base64Data: str
+
+ @field_validator('base64Data')
+ @classmethod
+ def validate_base64_and_size(cls, v: str) -> str:
+ """Validate that base64 data is valid and within size limit."""
+ try:
+ decoded = base64.b64decode(v)
+ if len(decoded) > MAX_IMAGE_SIZE:
+ raise ValueError(
+ f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
+ f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
+ )
+ return v
+ except Exception as e:
+ if 'Image size' in str(e):
+ raise
+ raise ValueError(f'Invalid base64 data: {e}')
+
+
# ============================================================================
# Filesystem Schemas
# ============================================================================
diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py
index a1a55db..1d59532 100644
--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -17,8 +17,28 @@ from typing import AsyncGenerator, Optional
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
+from ..schemas import ImageAttachment
+
logger = logging.getLogger(__name__)
+
+async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
+ """
+ Create an async generator that yields a properly formatted multimodal message.
+
+ The Claude Agent SDK's query() method accepts either:
+ - A string (simple text)
+ - An AsyncIterable[dict] (for custom message formats)
+
+ This function wraps content blocks in the expected message format.
+ """
+ yield {
+ "type": "user",
+ "message": {"role": "user", "content": content_blocks},
+ "parent_tool_use_id": None,
+ "session_id": "default",
+ }
+
# Root directory of the project
ROOT_DIR = Path(__file__).parent.parent.parent
@@ -166,12 +186,17 @@ class SpecChatSession:
"content": f"Failed to start conversation: {str(e)}"
}
- async def send_message(self, user_message: str) -> AsyncGenerator[dict, None]:
+ async def send_message(
+ self,
+ user_message: str,
+ attachments: list[ImageAttachment] | None = None
+ ) -> AsyncGenerator[dict, None]:
"""
Send user message and stream Claude's response.
Args:
user_message: The user's response
+ attachments: Optional list of image attachments
Yields:
Message chunks of various types:
@@ -191,11 +216,12 @@ class SpecChatSession:
self.messages.append({
"role": "user",
"content": user_message,
+ "has_attachments": bool(attachments),
"timestamp": datetime.now().isoformat()
})
try:
- async for chunk in self._query_claude(user_message):
+ async for chunk in self._query_claude(user_message, attachments):
yield chunk
# Signal that the response is complete (for UI to hide loading indicator)
yield {"type": "response_done"}
@@ -206,11 +232,16 @@ class SpecChatSession:
"content": f"Error: {str(e)}"
}
- async def _query_claude(self, message: str) -> AsyncGenerator[dict, None]:
+ async def _query_claude(
+ self,
+ message: str,
+ attachments: list[ImageAttachment] | None = None
+ ) -> AsyncGenerator[dict, None]:
"""
Internal method to query Claude and stream responses.
Handles tool calls (Write) and text responses.
+ Supports multimodal content with image attachments.
IMPORTANT: Spec creation requires BOTH files to be written:
1. app_spec.txt - the main specification
@@ -221,8 +252,33 @@ class SpecChatSession:
if not self.client:
return
- # Send the message to Claude using the SDK's query method
- await self.client.query(message)
+ # Build the message content
+ if attachments and len(attachments) > 0:
+ # Multimodal message: build content blocks array
+ content_blocks = []
+
+ # Add text block if there's text
+ if message:
+ content_blocks.append({"type": "text", "text": message})
+
+ # Add image blocks
+ for att in attachments:
+ content_blocks.append({
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": att.mimeType,
+ "data": att.base64Data,
+ }
+ })
+
+ # Send multimodal content to Claude using async generator format
+ # The SDK's query() accepts AsyncIterable[dict] for custom message formats
+ await self.client.query(_make_multimodal_message(content_blocks))
+ logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
+ else:
+ # Text-only message: use string format
+ await self.client.query(message)
current_text = ""
diff --git a/ui/src/components/ChatMessage.tsx b/ui/src/components/ChatMessage.tsx
index 66341ad..3eb9600 100644
--- a/ui/src/components/ChatMessage.tsx
+++ b/ui/src/components/ChatMessage.tsx
@@ -13,7 +13,7 @@ interface ChatMessageProps {
}
export function ChatMessage({ message }: ChatMessageProps) {
- const { role, content, timestamp, isStreaming } = message
+ const { role, content, attachments, timestamp, isStreaming } = message
// Format timestamp
const timeString = timestamp.toLocaleTimeString([], {
@@ -103,38 +103,63 @@ export function ChatMessage({ message }: ChatMessageProps) {
`}
>
{/* Parse content for basic markdown-like formatting */}
-
- {content.split('\n').map((line, i) => {
- // Bold text
- const boldRegex = /\*\*(.*?)\*\*/g
- const parts = []
- let lastIndex = 0
- let match
+ {content && (
+
+ {content.split('\n').map((line, i) => {
+ // Bold text
+ const boldRegex = /\*\*(.*?)\*\*/g
+ const parts = []
+ let lastIndex = 0
+ let match
- while ((match = boldRegex.exec(line)) !== null) {
- if (match.index > lastIndex) {
- parts.push(line.slice(lastIndex, match.index))
+ while ((match = boldRegex.exec(line)) !== null) {
+ if (match.index > lastIndex) {
+ parts.push(line.slice(lastIndex, match.index))
+ }
+ parts.push(
+
+ {match[1]}
+
+ )
+ lastIndex = match.index + match[0].length
}
- parts.push(
-
- {match[1]}
-
+
+ if (lastIndex < line.length) {
+ parts.push(line.slice(lastIndex))
+ }
+
+ return (
+
+ {parts.length > 0 ? parts : line}
+ {i < content.split('\n').length - 1 && '\n'}
+
)
- lastIndex = match.index + match[0].length
- }
+ })}
+
+ )}
- if (lastIndex < line.length) {
- parts.push(line.slice(lastIndex))
- }
-
- return (
-
- {parts.length > 0 ? parts : line}
- {i < content.split('\n').length - 1 && '\n'}
-
- )
- })}
-
+ {/* Display image attachments */}
+ {attachments && attachments.length > 0 && (
+
+ {attachments.map((attachment) => (
+
+

window.open(attachment.previewUrl, '_blank')}
+ title={`${attachment.filename} (click to enlarge)`}
+ />
+
+ {attachment.filename}
+
+
+ ))}
+
+ )}
{/* Streaming indicator */}
{isStreaming && (
diff --git a/ui/src/components/SpecCreationChat.tsx b/ui/src/components/SpecCreationChat.tsx
index e63e298..578cd55 100644
--- a/ui/src/components/SpecCreationChat.tsx
+++ b/ui/src/components/SpecCreationChat.tsx
@@ -5,12 +5,17 @@
* Handles the 7-phase conversation flow for creating app specifications.
*/
-import { useEffect, useRef, useState } from 'react'
-import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Loader2, ArrowRight, Zap } from 'lucide-react'
+import { useCallback, useEffect, useRef, useState } from 'react'
+import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Loader2, ArrowRight, Zap, Paperclip } from 'lucide-react'
import { useSpecChat } from '../hooks/useSpecChat'
import { ChatMessage } from './ChatMessage'
import { QuestionOptions } from './QuestionOptions'
import { TypingIndicator } from './TypingIndicator'
+import type { ImageAttachment } from '../lib/types'
+
+// Image upload validation constants
+const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB
+const ALLOWED_TYPES = ['image/jpeg', 'image/png']
type InitializerStatus = 'idle' | 'starting' | 'error'
@@ -34,8 +39,10 @@ export function SpecCreationChat({
const [input, setInput] = useState('')
const [error, setError] = useState(null)
const [yoloEnabled, setYoloEnabled] = useState(false)
+ const [pendingAttachments, setPendingAttachments] = useState([])
const messagesEndRef = useRef(null)
const inputRef = useRef(null)
+ const fileInputRef = useRef(null)
const {
messages,
@@ -76,10 +83,12 @@ export function SpecCreationChat({
const handleSendMessage = () => {
const trimmed = input.trim()
- if (!trimmed || isLoading) return
+ // Allow sending if there's text OR attachments
+ if ((!trimmed && pendingAttachments.length === 0) || isLoading) return
- sendMessage(trimmed)
+ sendMessage(trimmed, pendingAttachments.length > 0 ? pendingAttachments : undefined)
setInput('')
+ setPendingAttachments([]) // Clear attachments after sending
}
const handleKeyDown = (e: React.KeyboardEvent) => {
@@ -93,6 +102,61 @@ export function SpecCreationChat({
sendAnswer(answers)
}
+ // File handling for image attachments
+ const handleFileSelect = useCallback((files: FileList | null) => {
+ if (!files) return
+
+ Array.from(files).forEach((file) => {
+ // Validate file type
+ if (!ALLOWED_TYPES.includes(file.type)) {
+ setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`)
+ return
+ }
+
+ // Validate file size
+ if (file.size > MAX_FILE_SIZE) {
+ setError(`File too large: ${file.name}. Maximum size is 5 MB.`)
+ return
+ }
+
+ // Read and convert to base64
+ const reader = new FileReader()
+ reader.onload = (e) => {
+ const dataUrl = e.target?.result as string
+ // dataUrl is ""
+ const base64Data = dataUrl.split(',')[1]
+
+ const attachment: ImageAttachment = {
+ id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
+ filename: file.name,
+ mimeType: file.type as 'image/jpeg' | 'image/png',
+ base64Data,
+ previewUrl: dataUrl,
+ size: file.size,
+ }
+
+ setPendingAttachments((prev) => [...prev, attachment])
+ }
+ reader.readAsDataURL(file)
+ })
+ }, [])
+
+ const handleRemoveAttachment = useCallback((id: string) => {
+ setPendingAttachments((prev) => prev.filter((a) => a.id !== id))
+ }, [])
+
+ const handleDrop = useCallback(
+ (e: React.DragEvent) => {
+ e.preventDefault()
+ handleFileSelect(e.dataTransfer.files)
+ },
+ [handleFileSelect]
+ )
+
+ const handleDragOver = useCallback((e: React.DragEvent) => {
+ e.preventDefault()
+ }, [])
+
// Connection status indicator
const ConnectionIndicator = () => {
switch (connectionStatus) {
@@ -216,8 +280,62 @@ export function SpecCreationChat({
{/* Input area */}
{!isComplete && (
-
+
+ {/* Attachment previews */}
+ {pendingAttachments.length > 0 && (
+
+ {pendingAttachments.map((attachment) => (
+
+

+
+
+ {attachment.filename.length > 10
+ ? `${attachment.filename.substring(0, 7)}...`
+ : attachment.filename}
+
+
+ ))}
+
+ )}
+
)}
diff --git a/ui/src/hooks/useSpecChat.ts b/ui/src/hooks/useSpecChat.ts
index 4442466..727e77a 100644
--- a/ui/src/hooks/useSpecChat.ts
+++ b/ui/src/hooks/useSpecChat.ts
@@ -3,7 +3,7 @@
*/
import { useState, useCallback, useRef, useEffect } from 'react'
-import type { ChatMessage, SpecChatServerMessage, SpecQuestion } from '../lib/types'
+import type { ChatMessage, ImageAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types'
type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'
@@ -21,7 +21,7 @@ interface UseSpecChatReturn {
currentQuestions: SpecQuestion[] | null
currentToolId: string | null
start: () => void
- sendMessage: (content: string) => void
+ sendMessage: (content: string, attachments?: ImageAttachment[]) => void
sendAnswer: (answers: Record
) => void
disconnect: () => void
}
@@ -303,19 +303,20 @@ export function useSpecChat({
setTimeout(checkAndSend, 100)
}, [connect])
- const sendMessage = useCallback((content: string) => {
+ const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => {
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
onError?.('Not connected')
return
}
- // Add user message to chat
+ // Add user message to chat (with attachments for display)
setMessages((prev) => [
...prev,
{
id: generateId(),
role: 'user',
content,
+ attachments,
timestamp: new Date(),
},
])
@@ -325,8 +326,23 @@ export function useSpecChat({
setCurrentToolId(null)
setIsLoading(true)
+ // Build message payload
+ const payload: { type: string; content: string; attachments?: Array<{ filename: string; mimeType: string; base64Data: string }> } = {
+ type: 'message',
+ content,
+ }
+
+ // Add attachments if present (send base64 data, not preview URL)
+ if (attachments && attachments.length > 0) {
+ payload.attachments = attachments.map((a) => ({
+ filename: a.filename,
+ mimeType: a.mimeType,
+ base64Data: a.base64Data,
+ }))
+ }
+
// Send to server
- wsRef.current.send(JSON.stringify({ type: 'message', content }))
+ wsRef.current.send(JSON.stringify(payload))
}, [onError])
const sendAnswer = useCallback((answers: Record) => {
diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts
index a40b9cb..f1b818b 100644
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -209,11 +209,22 @@ export type SpecChatServerMessage =
| SpecChatPongMessage
| SpecChatResponseDoneMessage
+// Image attachment for chat messages
+export interface ImageAttachment {
+ id: string
+ filename: string
+ mimeType: 'image/jpeg' | 'image/png'
+ base64Data: string // Raw base64 (without data: prefix)
+ previewUrl: string // data: URL for display
+ size: number // File size in bytes
+}
+
// UI chat message for display
export interface ChatMessage {
id: string
role: 'user' | 'assistant' | 'system'
content: string
+ attachments?: ImageAttachment[]
timestamp: Date
questions?: SpecQuestion[]
isStreaming?: boolean