From 7210c6f0661046b63d58c0dc265d2bf6a51a03d1 Mon Sep 17 00:00:00 2001 From: Auto Date: Wed, 25 Mar 2026 12:51:17 +0200 Subject: [PATCH] feat: add document file upload support for spec creation and project expansion Add support for uploading Markdown, Text, Word (.docx), CSV, Excel (.xlsx), PDF, and PowerPoint (.pptx) files in addition to existing JPEG/PNG image uploads in the spec creation and project expansion chat interfaces. Backend changes: - New server/utils/document_extraction.py: in-memory text extraction for all document formats using python-docx, openpyxl, PyPDF2, python-pptx (no disk persistence) - Rename ImageAttachment to FileAttachment across schemas, routers, and chat session services - Add build_attachment_content_blocks() helper in chat_constants.py to route images as image content blocks and documents as extracted text blocks - Separate size limits: 5MB for images, 20MB for documents - Handle extraction errors (corrupt files, encrypted PDFs) gracefully Frontend changes: - Widen accepted MIME types and file extensions in both chat components - Add resolveMimeType() fallback for browsers that don't set MIME on .md files - Document attachments display with FileText icon instead of image thumbnail - ChatMessage renders documents as compact pills with filename and size - Update help text from "attach images" to "attach files" Dependencies added: python-docx, openpyxl, PyPDF2, python-pptx Co-Authored-By: Claude Opus 4.6 (1M context) --- requirements-prod.txt | 4 + requirements.txt | 4 + server/routers/expand_project.py | 6 +- server/routers/spec_creation.py | 6 +- server/schemas.py | 59 +++++-- server/services/chat_constants.py | 36 ++++ server/services/expand_chat_session.py | 27 +-- server/services/spec_chat_session.py | 26 ++- server/utils/document_extraction.py | 221 ++++++++++++++++++++++++ ui/src/components/ChatMessage.tsx | 39 +++-- ui/src/components/ExpandProjectChat.tsx | 71 +++++--- ui/src/components/SpecCreationChat.tsx | 70 +++++--- ui/src/hooks/useExpandChat.ts | 6 +- ui/src/hooks/useSpecChat.ts | 6 +- ui/src/lib/types.ts | 55 +++++- 15 files changed, 513 insertions(+), 123 deletions(-) create mode 100644 server/utils/document_extraction.py diff --git a/requirements-prod.txt b/requirements-prod.txt index 1b2f0b2..12d5d32 100644 --- a/requirements-prod.txt +++ b/requirements-prod.txt @@ -12,3 +12,7 @@ aiofiles>=24.0.0 apscheduler>=3.10.0,<4.0.0 pywinpty>=2.0.0; sys_platform == "win32" pyyaml>=6.0.0 +python-docx>=1.1.0 +openpyxl>=3.1.0 +PyPDF2>=3.0.0 +python-pptx>=1.0.0 diff --git a/requirements.txt b/requirements.txt index f042b4d..23c9afd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,10 @@ aiofiles>=24.0.0 apscheduler>=3.10.0,<4.0.0 pywinpty>=2.0.0; sys_platform == "win32" pyyaml>=6.0.0 +python-docx>=1.1.0 +openpyxl>=3.1.0 +PyPDF2>=3.0.0 +python-pptx>=1.0.0 # Dev dependencies ruff>=0.8.0 diff --git a/server/routers/expand_project.py b/server/routers/expand_project.py index d680b95..af96161 100644 --- a/server/routers/expand_project.py +++ b/server/routers/expand_project.py @@ -13,7 +13,7 @@ from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect from pydantic import BaseModel, ValidationError -from ..schemas import ImageAttachment +from ..schemas import FileAttachment from ..services.expand_chat_session import ( ExpandChatSession, create_expand_session, @@ -181,12 +181,12 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str): user_content = message.get("content", "").strip() # Parse attachments if present - attachments: list[ImageAttachment] = [] + attachments: list[FileAttachment] = [] raw_attachments = message.get("attachments", []) if raw_attachments: try: for raw_att in raw_attachments: - attachments.append(ImageAttachment(**raw_att)) + attachments.append(FileAttachment(**raw_att)) except (ValidationError, Exception) as e: logger.warning(f"Invalid attachment data: {e}") await websocket.send_json({ diff --git a/server/routers/spec_creation.py b/server/routers/spec_creation.py index 44b8d04..a333f35 100644 --- a/server/routers/spec_creation.py +++ b/server/routers/spec_creation.py @@ -12,7 +12,7 @@ from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect from pydantic import BaseModel, ValidationError -from ..schemas import ImageAttachment +from ..schemas import FileAttachment from ..services.spec_chat_session import ( SpecChatSession, create_session, @@ -242,12 +242,12 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str): user_content = message.get("content", "").strip() # Parse attachments if present - attachments: list[ImageAttachment] = [] + attachments: list[FileAttachment] = [] raw_attachments = message.get("attachments", []) if raw_attachments: try: for raw_att in raw_attachments: - attachments.append(ImageAttachment(**raw_att)) + attachments.append(FileAttachment(**raw_att)) except (ValidationError, Exception) as e: logger.warning(f"Invalid attachment data: {e}") await websocket.send_json({ diff --git a/server/schemas.py b/server/schemas.py index 72124a5..abe5bbc 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -11,7 +11,7 @@ from datetime import datetime from pathlib import Path from typing import Literal -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, model_validator # Import model constants from registry (single source of truth) _root = Path(__file__).parent.parent @@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel): # ============================================================================ -# Spec Chat Schemas +# Chat Attachment Schemas # ============================================================================ -# Maximum image file size: 5 MB -MAX_IMAGE_SIZE = 5 * 1024 * 1024 +# Size limits +MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5 MB for images +MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 # 20 MB for documents + +_IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'} -class ImageAttachment(BaseModel): - """Image attachment from client for spec creation chat.""" +class FileAttachment(BaseModel): + """File attachment from client for spec creation / expand project chat.""" filename: str = Field(..., min_length=1, max_length=255) - mimeType: Literal['image/jpeg', 'image/png'] + mimeType: Literal[ + 'image/jpeg', 'image/png', + 'text/plain', 'text/markdown', 'text/csv', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/pdf', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + ] base64Data: str @field_validator('base64Data') @classmethod - def validate_base64_and_size(cls, v: str) -> str: - """Validate that base64 data is valid and within size limit.""" + def validate_base64(cls, v: str) -> str: + """Validate that base64 data is decodable.""" try: - decoded = base64.b64decode(v) - if len(decoded) > MAX_IMAGE_SIZE: - raise ValueError( - f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds ' - f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB' - ) + base64.b64decode(v) return v except Exception as e: - if 'Image size' in str(e): - raise raise ValueError(f'Invalid base64 data: {e}') + @model_validator(mode='after') + def validate_size(self) -> 'FileAttachment': + """Validate file size based on MIME type.""" + try: + decoded = base64.b64decode(self.base64Data) + except Exception: + return self # Already caught by field validator + + if self.mimeType in _IMAGE_MIME_TYPES: + max_size = MAX_IMAGE_SIZE + label = "Image" + else: + max_size = MAX_DOCUMENT_SIZE + label = "Document" + + if len(decoded) > max_size: + raise ValueError( + f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds ' + f'maximum of {max_size // (1024 * 1024)} MB' + ) + return self + # ============================================================================ # Filesystem Schemas diff --git a/server/services/chat_constants.py b/server/services/chat_constants.py index 16a41fd..2e832e2 100644 --- a/server/services/chat_constants.py +++ b/server/services/chat_constants.py @@ -35,6 +35,13 @@ if _root_str not in sys.path: from env_constants import API_ENV_VARS # noqa: E402, F401 from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401 +from ..schemas import FileAttachment +from ..utils.document_extraction import ( + extract_text_from_document, + is_document, + is_image, +) + logger = logging.getLogger(__name__) @@ -88,6 +95,35 @@ async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenera raise +def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]: + """Convert FileAttachment objects to Claude API content blocks. + + Images become image content blocks (passed directly to Claude's vision). + Documents are extracted to text and become text content blocks. + + Raises: + DocumentExtractionError: If a document cannot be read. + """ + blocks: list[dict] = [] + for att in attachments: + if is_image(att.mimeType): + blocks.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": att.mimeType, + "data": att.base64Data, + } + }) + elif is_document(att.mimeType): + text = extract_text_from_document(att.base64Data, att.mimeType, att.filename) + blocks.append({ + "type": "text", + "text": f"[Content of uploaded file: {att.filename}]\n\n{text}", + }) + return blocks + + async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: """Yield a single multimodal user message in Claude Agent SDK format. diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py index 35a2f5c..00a0926 100644 --- a/server/services/expand_chat_session.py +++ b/server/services/expand_chat_session.py @@ -21,9 +21,11 @@ from typing import Any, AsyncGenerator, Optional from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from dotenv import load_dotenv -from ..schemas import ImageAttachment +from ..schemas import FileAttachment +from ..utils.document_extraction import DocumentExtractionError from .chat_constants import ( ROOT_DIR, + build_attachment_content_blocks, check_rate_limit_error, make_multimodal_message, safe_receive_response, @@ -226,7 +228,7 @@ class ExpandChatSession: async def send_message( self, user_message: str, - attachments: list[ImageAttachment] | None = None + attachments: list[FileAttachment] | None = None ) -> AsyncGenerator[dict, None]: """ Send user message and stream Claude's response. @@ -273,7 +275,7 @@ class ExpandChatSession: async def _query_claude( self, message: str, - attachments: list[ImageAttachment] | None = None + attachments: list[FileAttachment] | None = None ) -> AsyncGenerator[dict, None]: """ Internal method to query Claude and stream responses. @@ -289,17 +291,16 @@ class ExpandChatSession: content_blocks: list[dict[str, Any]] = [] if message: content_blocks.append({"type": "text", "text": message}) - for att in attachments: - content_blocks.append({ - "type": "image", - "source": { - "type": "base64", - "media_type": att.mimeType, - "data": att.base64Data, - } - }) + + # Add attachment blocks (images as image blocks, documents as extracted text) + try: + content_blocks.extend(build_attachment_content_blocks(attachments)) + except DocumentExtractionError as e: + yield {"type": "error", "content": str(e)} + return + await self.client.query(make_multimodal_message(content_blocks)) - logger.info(f"Sent multimodal message with {len(attachments)} image(s)") + logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)") else: await self.client.query(message) diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py index a6b5598..9e853c3 100644 --- a/server/services/spec_chat_session.py +++ b/server/services/spec_chat_session.py @@ -18,9 +18,11 @@ from typing import Any, AsyncGenerator, Optional from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from dotenv import load_dotenv -from ..schemas import ImageAttachment +from ..schemas import FileAttachment +from ..utils.document_extraction import DocumentExtractionError from .chat_constants import ( ROOT_DIR, + build_attachment_content_blocks, check_rate_limit_error, make_multimodal_message, safe_receive_response, @@ -201,7 +203,7 @@ class SpecChatSession: async def send_message( self, user_message: str, - attachments: list[ImageAttachment] | None = None + attachments: list[FileAttachment] | None = None ) -> AsyncGenerator[dict, None]: """ Send user message and stream Claude's response. @@ -247,7 +249,7 @@ class SpecChatSession: async def _query_claude( self, message: str, - attachments: list[ImageAttachment] | None = None + attachments: list[FileAttachment] | None = None ) -> AsyncGenerator[dict, None]: """ Internal method to query Claude and stream responses. @@ -273,21 +275,17 @@ class SpecChatSession: if message: content_blocks.append({"type": "text", "text": message}) - # Add image blocks - for att in attachments: - content_blocks.append({ - "type": "image", - "source": { - "type": "base64", - "media_type": att.mimeType, - "data": att.base64Data, - } - }) + # Add attachment blocks (images as image blocks, documents as extracted text) + try: + content_blocks.extend(build_attachment_content_blocks(attachments)) + except DocumentExtractionError as e: + yield {"type": "error", "content": str(e)} + return # Send multimodal content to Claude using async generator format # The SDK's query() accepts AsyncIterable[dict] for custom message formats await self.client.query(make_multimodal_message(content_blocks)) - logger.info(f"Sent multimodal message with {len(attachments)} image(s)") + logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)") else: # Text-only message: use string format await self.client.query(message) diff --git a/server/utils/document_extraction.py b/server/utils/document_extraction.py new file mode 100644 index 0000000..b0c13d8 --- /dev/null +++ b/server/utils/document_extraction.py @@ -0,0 +1,221 @@ +""" +Document Extraction Utility +============================ + +Extracts text content from various document formats in memory (no disk I/O). +Supports: TXT, MD, CSV, DOCX, XLSX, PDF, PPTX. +""" + +import base64 +import csv +import io +import logging + +logger = logging.getLogger(__name__) + +# Maximum characters of extracted text to send to Claude +MAX_EXTRACTED_CHARS = 200_000 + +# Maximum rows per sheet for Excel files +MAX_EXCEL_ROWS_PER_SHEET = 10_000 +MAX_EXCEL_SHEETS = 50 + +# MIME type classification +DOCUMENT_MIME_TYPES: dict[str, str] = { + "text/plain": ".txt", + "text/markdown": ".md", + "text/csv": ".csv", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", + "application/pdf": ".pdf", + "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", +} + +IMAGE_MIME_TYPES = {"image/jpeg", "image/png"} + +ALL_ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES | set(DOCUMENT_MIME_TYPES.keys()) + + +def is_image(mime_type: str) -> bool: + """Check if the MIME type is a supported image format.""" + return mime_type in IMAGE_MIME_TYPES + + +def is_document(mime_type: str) -> bool: + """Check if the MIME type is a supported document format.""" + return mime_type in DOCUMENT_MIME_TYPES + + +class DocumentExtractionError(Exception): + """Raised when text extraction from a document fails.""" + + def __init__(self, filename: str, reason: str): + self.filename = filename + self.reason = reason + super().__init__(f"Failed to read {filename}: {reason}") + + +def _truncate(text: str) -> str: + """Truncate text if it exceeds the maximum character limit.""" + if len(text) > MAX_EXTRACTED_CHARS: + omitted = len(text) - MAX_EXTRACTED_CHARS + return text[:MAX_EXTRACTED_CHARS] + f"\n\n[... truncated, {omitted:,} characters omitted]" + return text + + +def _extract_plain_text(data: bytes) -> str: + """Extract text from plain text or markdown files.""" + try: + return data.decode("utf-8") + except UnicodeDecodeError: + return data.decode("latin-1") + + +def _extract_csv(data: bytes) -> str: + """Extract text from CSV files, formatted as a readable table.""" + try: + text = data.decode("utf-8") + except UnicodeDecodeError: + text = data.decode("latin-1") + + reader = csv.reader(io.StringIO(text)) + lines = [] + for i, row in enumerate(reader): + lines.append(f"Row {i + 1}: {', '.join(row)}") + return "\n".join(lines) + + +def _extract_docx(data: bytes) -> str: + """Extract text from Word documents.""" + from docx import Document + + doc = Document(io.BytesIO(data)) + paragraphs = [p.text for p in doc.paragraphs if p.text.strip()] + return "\n\n".join(paragraphs) + + +def _extract_xlsx(data: bytes) -> str: + """Extract text from Excel spreadsheets.""" + from openpyxl import load_workbook + + wb = load_workbook(io.BytesIO(data), read_only=True, data_only=True) + sections = [] + + for sheet_idx, sheet_name in enumerate(wb.sheetnames): + if sheet_idx >= MAX_EXCEL_SHEETS: + sections.append(f"\n[... {len(wb.sheetnames) - MAX_EXCEL_SHEETS} more sheets omitted]") + break + + ws = wb[sheet_name] + rows_text = [f"=== Sheet: {sheet_name} ==="] + row_count = 0 + + for row in ws.iter_rows(values_only=True): + if row_count >= MAX_EXCEL_ROWS_PER_SHEET: + rows_text.append(f"[... more rows omitted, limit {MAX_EXCEL_ROWS_PER_SHEET:,} rows/sheet]") + break + cells = [str(cell) if cell is not None else "" for cell in row] + rows_text.append("\t".join(cells)) + row_count += 1 + + sections.append("\n".join(rows_text)) + + wb.close() + return "\n\n".join(sections) + + +def _extract_pdf(data: bytes, filename: str) -> str: + """Extract text from PDF files.""" + from PyPDF2 import PdfReader + from PyPDF2.errors import PdfReadError + + try: + reader = PdfReader(io.BytesIO(data)) + except PdfReadError as e: + if "encrypt" in str(e).lower() or "password" in str(e).lower(): + raise DocumentExtractionError(filename, "PDF is password-protected") + raise + + if reader.is_encrypted: + raise DocumentExtractionError(filename, "PDF is password-protected") + + pages = [] + for i, page in enumerate(reader.pages): + text = page.extract_text() + if text and text.strip(): + pages.append(f"--- Page {i + 1} ---\n{text}") + + return "\n\n".join(pages) + + +def _extract_pptx(data: bytes) -> str: + """Extract text from PowerPoint presentations.""" + from pptx import Presentation + + prs = Presentation(io.BytesIO(data)) + slides_text = [] + + for i, slide in enumerate(prs.slides): + texts = [] + for shape in slide.shapes: + if shape.has_text_frame: + for paragraph in shape.text_frame.paragraphs: + text = paragraph.text.strip() + if text: + texts.append(text) + if texts: + slides_text.append(f"--- Slide {i + 1} ---\n" + "\n".join(texts)) + + return "\n\n".join(slides_text) + + +def extract_text_from_document(base64_data: str, mime_type: str, filename: str) -> str: + """ + Extract text content from a document file. + + Args: + base64_data: Base64-encoded file content + mime_type: MIME type of the document + filename: Original filename (for error messages) + + Returns: + Extracted text content, truncated if necessary + + Raises: + DocumentExtractionError: If extraction fails + """ + if mime_type not in DOCUMENT_MIME_TYPES: + raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}") + + try: + data = base64.b64decode(base64_data) + except Exception as e: + raise DocumentExtractionError(filename, f"invalid base64 data: {e}") + + try: + if mime_type in ("text/plain", "text/markdown"): + text = _extract_plain_text(data) + elif mime_type == "text/csv": + text = _extract_csv(data) + elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": + text = _extract_docx(data) + elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": + text = _extract_xlsx(data) + elif mime_type == "application/pdf": + text = _extract_pdf(data, filename) + elif mime_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation": + text = _extract_pptx(data) + else: + raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}") + except DocumentExtractionError: + raise + except Exception as e: + logger.warning(f"Document extraction failed for {filename}: {e}") + raise DocumentExtractionError( + filename, "file appears to be corrupt or in an unexpected format" + ) + + if not text or not text.strip(): + return f"[File {filename} is empty or contains no extractable text]" + + return _truncate(text) diff --git a/ui/src/components/ChatMessage.tsx b/ui/src/components/ChatMessage.tsx index fe87407..eff6f5a 100644 --- a/ui/src/components/ChatMessage.tsx +++ b/ui/src/components/ChatMessage.tsx @@ -6,10 +6,11 @@ */ import { memo } from 'react' -import { Bot, User, Info } from 'lucide-react' +import { Bot, User, Info, FileText } from 'lucide-react' import ReactMarkdown, { type Components } from 'react-markdown' import remarkGfm from 'remark-gfm' import type { ChatMessage as ChatMessageType } from '../lib/types' +import { isImageAttachment } from '../lib/types' import { Card } from '@/components/ui/card' interface ChatMessageProps { @@ -104,21 +105,35 @@ export const ChatMessage = memo(function ChatMessage({ message }: ChatMessagePro )} - {/* Display image attachments */} + {/* Display file attachments */} {attachments && attachments.length > 0 && (
{attachments.map((attachment) => (
- {attachment.filename} window.open(attachment.previewUrl, '_blank')} - title={`${attachment.filename} (click to enlarge)`} - /> - - {attachment.filename} - + {isImageAttachment(attachment) ? ( + <> + {attachment.filename} window.open(attachment.previewUrl, '_blank')} + title={`${attachment.filename} (click to enlarge)`} + /> + + {attachment.filename} + + + ) : ( +
+ + + {attachment.filename} + + + ({(attachment.size / 1024).toFixed(0)} KB) + +
+ )}
))}
diff --git a/ui/src/components/ExpandProjectChat.tsx b/ui/src/components/ExpandProjectChat.tsx index d1ccd21..fe53cd6 100644 --- a/ui/src/components/ExpandProjectChat.tsx +++ b/ui/src/components/ExpandProjectChat.tsx @@ -6,20 +6,22 @@ */ import { useCallback, useEffect, useRef, useState } from 'react' -import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus } from 'lucide-react' +import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus, FileText } from 'lucide-react' import { useExpandChat } from '../hooks/useExpandChat' import { ChatMessage } from './ChatMessage' import { TypingIndicator } from './TypingIndicator' -import type { ImageAttachment } from '../lib/types' +import type { FileAttachment } from '../lib/types' +import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types' import { isSubmitEnter } from '../lib/keyboard' import { Button } from '@/components/ui/button' import { Input } from '@/components/ui/input' import { Card, CardContent } from '@/components/ui/card' import { Alert, AlertDescription } from '@/components/ui/alert' -// Image upload validation constants -const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB -const ALLOWED_TYPES = ['image/jpeg', 'image/png'] +// File upload validation constants +const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images +const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents +const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png'] interface ExpandProjectChatProps { projectName: string @@ -34,7 +36,7 @@ export function ExpandProjectChat({ }: ExpandProjectChatProps) { const [input, setInput] = useState('') const [error, setError] = useState(null) - const [pendingAttachments, setPendingAttachments] = useState([]) + const [pendingAttachments, setPendingAttachments] = useState([]) const messagesEndRef = useRef(null) const inputRef = useRef(null) const fileInputRef = useRef(null) @@ -95,20 +97,33 @@ export function ExpandProjectChat({ } } - // File handling for image attachments + // File handling for attachments (images and documents) const handleFileSelect = useCallback((files: FileList | null) => { if (!files) return Array.from(files).forEach((file) => { - // Validate file type - if (!ALLOWED_TYPES.includes(file.type)) { - setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`) - return + // Resolve MIME type (browsers may not set it for .md files) + let mimeType = file.type + if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) { + mimeType = resolveMimeType(file.name) } - // Validate file size - if (file.size > MAX_FILE_SIZE) { - setError(`File too large: ${file.name}. Maximum size is 5 MB.`) + // Validate file type + if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) { + const ext = file.name.split('.').pop()?.toLowerCase() + if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) { + setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`) + return + } + mimeType = resolveMimeType(file.name) + } + + // Validate size based on type + const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType) + const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE + const maxLabel = isImage ? '5 MB' : '20 MB' + if (file.size > maxSize) { + setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`) return } @@ -118,12 +133,12 @@ export function ExpandProjectChat({ const dataUrl = e.target?.result as string const base64Data = dataUrl.split(',')[1] - const attachment: ImageAttachment = { + const attachment: FileAttachment = { id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, filename: file.name, - mimeType: file.type as 'image/jpeg' | 'image/png', + mimeType: mimeType as FileAttachment['mimeType'], base64Data, - previewUrl: dataUrl, + previewUrl: isImage ? dataUrl : '', size: file.size, } @@ -291,11 +306,17 @@ export function ExpandProjectChat({ key={attachment.id} className="relative group border-2 border-border p-1 bg-card rounded shadow-sm" > - {attachment.filename} + {isImageAttachment(attachment) ? ( + {attachment.filename} + ) : ( +
+ +
+ )} @@ -364,7 +385,7 @@ export function ExpandProjectChat({ {/* Help text */}

- Press Enter to send. Drag & drop or click to attach images. + Press Enter to send. Drag & drop or click to attach files.

)} diff --git a/ui/src/components/SpecCreationChat.tsx b/ui/src/components/SpecCreationChat.tsx index 184b26e..02cf7c1 100644 --- a/ui/src/components/SpecCreationChat.tsx +++ b/ui/src/components/SpecCreationChat.tsx @@ -11,16 +11,18 @@ import { useSpecChat } from '../hooks/useSpecChat' import { ChatMessage } from './ChatMessage' import { QuestionOptions } from './QuestionOptions' import { TypingIndicator } from './TypingIndicator' -import type { ImageAttachment } from '../lib/types' +import type { FileAttachment } from '../lib/types' +import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types' import { isSubmitEnter } from '../lib/keyboard' import { Button } from '@/components/ui/button' import { Textarea } from '@/components/ui/textarea' import { Card, CardContent } from '@/components/ui/card' import { Alert, AlertDescription } from '@/components/ui/alert' -// Image upload validation constants -const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB -const ALLOWED_TYPES = ['image/jpeg', 'image/png'] +// File upload validation constants +const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images +const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents +const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png'] // Sample prompt for quick testing const SAMPLE_PROMPT = `Let's call it Simple Todo. This is a really simple web app that I can use to track my to-do items using a Kanban board. I should be able to add to-dos and then drag and drop them through the Kanban board. The different columns in the Kanban board are: @@ -64,7 +66,7 @@ export function SpecCreationChat({ const [input, setInput] = useState('') const [error, setError] = useState(null) const [yoloEnabled, setYoloEnabled] = useState(false) - const [pendingAttachments, setPendingAttachments] = useState([]) + const [pendingAttachments, setPendingAttachments] = useState([]) const messagesEndRef = useRef(null) const inputRef = useRef(null) const fileInputRef = useRef(null) @@ -138,20 +140,33 @@ export function SpecCreationChat({ sendAnswer(answers) } - // File handling for image attachments + // File handling for attachments (images and documents) const handleFileSelect = useCallback((files: FileList | null) => { if (!files) return Array.from(files).forEach((file) => { - // Validate file type - if (!ALLOWED_TYPES.includes(file.type)) { - setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`) - return + // Resolve MIME type (browsers may not set it for .md files) + let mimeType = file.type + if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) { + mimeType = resolveMimeType(file.name) } - // Validate file size - if (file.size > MAX_FILE_SIZE) { - setError(`File too large: ${file.name}. Maximum size is 5 MB.`) + // Validate file type + if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) { + const ext = file.name.split('.').pop()?.toLowerCase() + if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) { + setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`) + return + } + mimeType = resolveMimeType(file.name) + } + + // Validate size based on type + const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType) + const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE + const maxLabel = isImage ? '5 MB' : '20 MB' + if (file.size > maxSize) { + setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`) return } @@ -159,15 +174,14 @@ export function SpecCreationChat({ const reader = new FileReader() reader.onload = (e) => { const dataUrl = e.target?.result as string - // dataUrl is "data:image/png;base64,XXXXXX" const base64Data = dataUrl.split(',')[1] - const attachment: ImageAttachment = { + const attachment: FileAttachment = { id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, filename: file.name, - mimeType: file.type as 'image/jpeg' | 'image/png', + mimeType: mimeType as FileAttachment['mimeType'], base64Data, - previewUrl: dataUrl, + previewUrl: isImage ? dataUrl : '', size: file.size, } @@ -364,11 +378,17 @@ export function SpecCreationChat({ key={attachment.id} className="relative group border-2 border-border p-1 bg-card rounded shadow-sm" > - {attachment.filename} + {isImageAttachment(attachment) ? ( + {attachment.filename} + ) : ( +
+ +
+ )} @@ -444,7 +464,7 @@ export function SpecCreationChat({ {/* Help text */}

- Press Enter to send, Shift+Enter for new line. Drag & drop or click to attach images (JPEG/PNG, max 5MB). + Press Enter to send, Shift+Enter for new line. Drag & drop or click to attach files.

)} diff --git a/ui/src/hooks/useExpandChat.ts b/ui/src/hooks/useExpandChat.ts index be632a5..9af1d6b 100644 --- a/ui/src/hooks/useExpandChat.ts +++ b/ui/src/hooks/useExpandChat.ts @@ -3,7 +3,7 @@ */ import { useState, useCallback, useRef, useEffect } from 'react' -import type { ChatMessage, ImageAttachment, ExpandChatServerMessage } from '../lib/types' +import type { ChatMessage, FileAttachment, ExpandChatServerMessage } from '../lib/types' type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error' @@ -27,7 +27,7 @@ interface UseExpandChatReturn { featuresCreated: number recentFeatures: CreatedFeature[] start: () => void - sendMessage: (content: string, attachments?: ImageAttachment[]) => void + sendMessage: (content: string, attachments?: FileAttachment[]) => void disconnect: () => void } @@ -278,7 +278,7 @@ export function useExpandChat({ setTimeout(checkAndSend, 100) }, [connect, onError]) - const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => { + const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => { if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) { onError?.('Not connected') return diff --git a/ui/src/hooks/useSpecChat.ts b/ui/src/hooks/useSpecChat.ts index 3bd09bb..e01fc6c 100644 --- a/ui/src/hooks/useSpecChat.ts +++ b/ui/src/hooks/useSpecChat.ts @@ -3,7 +3,7 @@ */ import { useState, useCallback, useRef, useEffect } from 'react' -import type { ChatMessage, ImageAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types' +import type { ChatMessage, FileAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types' import { getSpecStatus } from '../lib/api' type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error' @@ -22,7 +22,7 @@ interface UseSpecChatReturn { currentQuestions: SpecQuestion[] | null currentToolId: string | null start: () => void - sendMessage: (content: string, attachments?: ImageAttachment[]) => void + sendMessage: (content: string, attachments?: FileAttachment[]) => void sendAnswer: (answers: Record) => void disconnect: () => void } @@ -367,7 +367,7 @@ export function useSpecChat({ setTimeout(checkAndSend, 100) }, [connect]) - const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => { + const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => { if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) { onError?.('Not connected') return diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index 236a8ab..6eb5e2e 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -417,22 +417,67 @@ export type SpecChatServerMessage = | SpecChatPongMessage | SpecChatResponseDoneMessage -// Image attachment for chat messages -export interface ImageAttachment { +// File attachment for chat messages (images and documents) +export interface FileAttachment { id: string filename: string - mimeType: 'image/jpeg' | 'image/png' + mimeType: + | 'image/jpeg' + | 'image/png' + | 'text/plain' + | 'text/markdown' + | 'text/csv' + | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + | 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + | 'application/pdf' + | 'application/vnd.openxmlformats-officedocument.presentationml.presentation' base64Data: string // Raw base64 (without data: prefix) - previewUrl: string // data: URL for display + previewUrl: string // data: URL for images, empty string for documents size: number // File size in bytes } +/** @deprecated Use FileAttachment instead */ +export type ImageAttachment = FileAttachment + +export const IMAGE_MIME_TYPES = ['image/jpeg', 'image/png'] as const +export const DOCUMENT_MIME_TYPES = [ + 'text/plain', + 'text/markdown', + 'text/csv', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/pdf', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', +] as const +export const ALL_ALLOWED_MIME_TYPES: string[] = [...IMAGE_MIME_TYPES, ...DOCUMENT_MIME_TYPES] + +export function isImageAttachment(att: FileAttachment): boolean { + return (IMAGE_MIME_TYPES as readonly string[]).includes(att.mimeType) +} + +export function resolveMimeType(filename: string): string { + const ext = filename.split('.').pop()?.toLowerCase() + const map: Record = { + md: 'text/markdown', + txt: 'text/plain', + csv: 'text/csv', + docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + pdf: 'application/pdf', + pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + png: 'image/png', + } + return map[ext || ''] || 'application/octet-stream' +} + // UI chat message for display export interface ChatMessage { id: string role: 'user' | 'assistant' | 'system' content: string - attachments?: ImageAttachment[] + attachments?: FileAttachment[] timestamp: Date questions?: SpecQuestion[] isStreaming?: boolean