feat: Add image upload support for Spec Creation chat

Add the ability to attach images (JPEG, PNG) in the Spec Creation chat interface for Claude to analyze during app specification creation. Frontend changes: - Add ImageAttachment interface to types.ts with id, filename, mimeType, base64Data, previewUrl, and size fields - Update ChatMessage interface with optional attachments field - Update useSpecChat hook to accept and send attachments via WebSocket - Add file input, drag-drop support, and preview thumbnails to SpecCreationChat component with validation (5 MB max, JPEG/PNG only) - Update ChatMessage component to render image attachments with click-to-enlarge functionality Backend changes: - Add ImageAttachment Pydantic schema with base64 validation - Update spec_creation.py WebSocket handler to parse and validate image attachments from client messages - Update spec_chat_session.py to format multimodal content blocks for Claude API using async generator pattern Features: - Drag-and-drop or click paperclip button to attach images - Preview thumbnails with remove button before sending - File type validation (image/jpeg, image/png) - File size validation (5 MB maximum) - Images display in chat history - Click images to view full size - Cross-platform compatible (Windows, macOS, Linux) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-17 02:43:09 +00:00 · 2026-01-02 10:12:04 +02:00
parent 05607b310a
commit b628aa7051
7 changed files with 335 additions and 52 deletions
--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -17,8 +17,28 @@ from typing import AsyncGenerator, Optional

 from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient

+from ..schemas import ImageAttachment
+
 logger = logging.getLogger(__name__)

+
+async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
+    """
+    Create an async generator that yields a properly formatted multimodal message.
+
+    The Claude Agent SDK's query() method accepts either:
+    - A string (simple text)
+    - An AsyncIterable[dict] (for custom message formats)
+
+    This function wraps content blocks in the expected message format.
+    """
+    yield {
+        "type": "user",
+        "message": {"role": "user", "content": content_blocks},
+        "parent_tool_use_id": None,
+        "session_id": "default",
+    }
+
 # Root directory of the project
 ROOT_DIR = Path(__file__).parent.parent.parent

@@ -166,12 +186,17 @@ class SpecChatSession:
                "content": f"Failed to start conversation: {str(e)}"
            }

-    async def send_message(self, user_message: str) -> AsyncGenerator[dict, None]:
+    async def send_message(
+        self,
+        user_message: str,
+        attachments: list[ImageAttachment] | None = None
+    ) -> AsyncGenerator[dict, None]:
        """
        Send user message and stream Claude's response.

        Args:
            user_message: The user's response
+            attachments: Optional list of image attachments

        Yields:
            Message chunks of various types:
@@ -191,11 +216,12 @@ class SpecChatSession:
        self.messages.append({
            "role": "user",
            "content": user_message,
+            "has_attachments": bool(attachments),
            "timestamp": datetime.now().isoformat()
        })

        try:
-            async for chunk in self._query_claude(user_message):
+            async for chunk in self._query_claude(user_message, attachments):
                yield chunk
            # Signal that the response is complete (for UI to hide loading indicator)
            yield {"type": "response_done"}
@@ -206,11 +232,16 @@ class SpecChatSession:
                "content": f"Error: {str(e)}"
            }

-    async def _query_claude(self, message: str) -> AsyncGenerator[dict, None]:
+    async def _query_claude(
+        self,
+        message: str,
+        attachments: list[ImageAttachment] | None = None
+    ) -> AsyncGenerator[dict, None]:
        """
        Internal method to query Claude and stream responses.

        Handles tool calls (Write) and text responses.
+        Supports multimodal content with image attachments.

        IMPORTANT: Spec creation requires BOTH files to be written:
        1. app_spec.txt - the main specification
@@ -221,8 +252,33 @@ class SpecChatSession:
        if not self.client:
            return

-        # Send the message to Claude using the SDK's query method
-        await self.client.query(message)
+        # Build the message content
+        if attachments and len(attachments) > 0:
+            # Multimodal message: build content blocks array
+            content_blocks = []
+
+            # Add text block if there's text
+            if message:
+                content_blocks.append({"type": "text", "text": message})
+
+            # Add image blocks
+            for att in attachments:
+                content_blocks.append({
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": att.mimeType,
+                        "data": att.base64Data,
+                    }
+                })
+
+            # Send multimodal content to Claude using async generator format
+            # The SDK's query() accepts AsyncIterable[dict] for custom message formats
+            await self.client.query(_make_multimodal_message(content_blocks))
+            logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
+        else:
+            # Text-only message: use string format
+            await self.client.query(message)

        current_text = ""