feat: add document file upload support for spec creation and project expansion

Add support for uploading Markdown, Text, Word (.docx), CSV, Excel (.xlsx),
PDF, and PowerPoint (.pptx) files in addition to existing JPEG/PNG image
uploads in the spec creation and project expansion chat interfaces.

Backend changes:
- New server/utils/document_extraction.py: in-memory text extraction for all
  document formats using python-docx, openpyxl, PyPDF2, python-pptx (no disk
  persistence)
- Rename ImageAttachment to FileAttachment across schemas, routers, and
  chat session services
- Add build_attachment_content_blocks() helper in chat_constants.py to route
  images as image content blocks and documents as extracted text blocks
- Separate size limits: 5MB for images, 20MB for documents
- Handle extraction errors (corrupt files, encrypted PDFs) gracefully

Frontend changes:
- Widen accepted MIME types and file extensions in both chat components
- Add resolveMimeType() fallback for browsers that don't set MIME on .md files
- Document attachments display with FileText icon instead of image thumbnail
- ChatMessage renders documents as compact pills with filename and size
- Update help text from "attach images" to "attach files"

Dependencies added: python-docx, openpyxl, PyPDF2, python-pptx

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Auto
2026-03-25 12:51:17 +02:00
parent fca1f6a5e2
commit 7210c6f066
15 changed files with 513 additions and 123 deletions

View File

@@ -12,3 +12,7 @@ aiofiles>=24.0.0
apscheduler>=3.10.0,<4.0.0 apscheduler>=3.10.0,<4.0.0
pywinpty>=2.0.0; sys_platform == "win32" pywinpty>=2.0.0; sys_platform == "win32"
pyyaml>=6.0.0 pyyaml>=6.0.0
python-docx>=1.1.0
openpyxl>=3.1.0
PyPDF2>=3.0.0
python-pptx>=1.0.0

View File

@@ -10,6 +10,10 @@ aiofiles>=24.0.0
apscheduler>=3.10.0,<4.0.0 apscheduler>=3.10.0,<4.0.0
pywinpty>=2.0.0; sys_platform == "win32" pywinpty>=2.0.0; sys_platform == "win32"
pyyaml>=6.0.0 pyyaml>=6.0.0
python-docx>=1.1.0
openpyxl>=3.1.0
PyPDF2>=3.0.0
python-pptx>=1.0.0
# Dev dependencies # Dev dependencies
ruff>=0.8.0 ruff>=0.8.0

View File

@@ -13,7 +13,7 @@ from typing import Optional
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
from ..schemas import ImageAttachment from ..schemas import FileAttachment
from ..services.expand_chat_session import ( from ..services.expand_chat_session import (
ExpandChatSession, ExpandChatSession,
create_expand_session, create_expand_session,
@@ -181,12 +181,12 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str):
user_content = message.get("content", "").strip() user_content = message.get("content", "").strip()
# Parse attachments if present # Parse attachments if present
attachments: list[ImageAttachment] = [] attachments: list[FileAttachment] = []
raw_attachments = message.get("attachments", []) raw_attachments = message.get("attachments", [])
if raw_attachments: if raw_attachments:
try: try:
for raw_att in raw_attachments: for raw_att in raw_attachments:
attachments.append(ImageAttachment(**raw_att)) attachments.append(FileAttachment(**raw_att))
except (ValidationError, Exception) as e: except (ValidationError, Exception) as e:
logger.warning(f"Invalid attachment data: {e}") logger.warning(f"Invalid attachment data: {e}")
await websocket.send_json({ await websocket.send_json({

View File

@@ -12,7 +12,7 @@ from typing import Optional
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
from ..schemas import ImageAttachment from ..schemas import FileAttachment
from ..services.spec_chat_session import ( from ..services.spec_chat_session import (
SpecChatSession, SpecChatSession,
create_session, create_session,
@@ -242,12 +242,12 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
user_content = message.get("content", "").strip() user_content = message.get("content", "").strip()
# Parse attachments if present # Parse attachments if present
attachments: list[ImageAttachment] = [] attachments: list[FileAttachment] = []
raw_attachments = message.get("attachments", []) raw_attachments = message.get("attachments", [])
if raw_attachments: if raw_attachments:
try: try:
for raw_att in raw_attachments: for raw_att in raw_attachments:
attachments.append(ImageAttachment(**raw_att)) attachments.append(FileAttachment(**raw_att))
except (ValidationError, Exception) as e: except (ValidationError, Exception) as e:
logger.warning(f"Invalid attachment data: {e}") logger.warning(f"Invalid attachment data: {e}")
await websocket.send_json({ await websocket.send_json({

View File

@@ -11,7 +11,7 @@ from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Literal from typing import Literal
from pydantic import BaseModel, Field, field_validator from pydantic import BaseModel, Field, field_validator, model_validator
# Import model constants from registry (single source of truth) # Import model constants from registry (single source of truth)
_root = Path(__file__).parent.parent _root = Path(__file__).parent.parent
@@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel):
# ============================================================================ # ============================================================================
# Spec Chat Schemas # Chat Attachment Schemas
# ============================================================================ # ============================================================================
# Maximum image file size: 5 MB # Size limits
MAX_IMAGE_SIZE = 5 * 1024 * 1024 MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5 MB for images
MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 # 20 MB for documents
_IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'}
class ImageAttachment(BaseModel): class FileAttachment(BaseModel):
"""Image attachment from client for spec creation chat.""" """File attachment from client for spec creation / expand project chat."""
filename: str = Field(..., min_length=1, max_length=255) filename: str = Field(..., min_length=1, max_length=255)
mimeType: Literal['image/jpeg', 'image/png'] mimeType: Literal[
'image/jpeg', 'image/png',
'text/plain', 'text/markdown', 'text/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/pdf',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
]
base64Data: str base64Data: str
@field_validator('base64Data') @field_validator('base64Data')
@classmethod @classmethod
def validate_base64_and_size(cls, v: str) -> str: def validate_base64(cls, v: str) -> str:
"""Validate that base64 data is valid and within size limit.""" """Validate that base64 data is decodable."""
try: try:
decoded = base64.b64decode(v) base64.b64decode(v)
if len(decoded) > MAX_IMAGE_SIZE:
raise ValueError(
f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
)
return v return v
except Exception as e: except Exception as e:
if 'Image size' in str(e):
raise
raise ValueError(f'Invalid base64 data: {e}') raise ValueError(f'Invalid base64 data: {e}')
@model_validator(mode='after')
def validate_size(self) -> 'FileAttachment':
"""Validate file size based on MIME type."""
try:
decoded = base64.b64decode(self.base64Data)
except Exception:
return self # Already caught by field validator
if self.mimeType in _IMAGE_MIME_TYPES:
max_size = MAX_IMAGE_SIZE
label = "Image"
else:
max_size = MAX_DOCUMENT_SIZE
label = "Document"
if len(decoded) > max_size:
raise ValueError(
f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {max_size // (1024 * 1024)} MB'
)
return self
# ============================================================================ # ============================================================================
# Filesystem Schemas # Filesystem Schemas

View File

@@ -35,6 +35,13 @@ if _root_str not in sys.path:
from env_constants import API_ENV_VARS # noqa: E402, F401 from env_constants import API_ENV_VARS # noqa: E402, F401
from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401 from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401
from ..schemas import FileAttachment
from ..utils.document_extraction import (
extract_text_from_document,
is_document,
is_image,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -88,6 +95,35 @@ async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenera
raise raise
def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
"""Convert FileAttachment objects to Claude API content blocks.
Images become image content blocks (passed directly to Claude's vision).
Documents are extracted to text and become text content blocks.
Raises:
DocumentExtractionError: If a document cannot be read.
"""
blocks: list[dict] = []
for att in attachments:
if is_image(att.mimeType):
blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
elif is_document(att.mimeType):
text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
blocks.append({
"type": "text",
"text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
})
return blocks
async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
"""Yield a single multimodal user message in Claude Agent SDK format. """Yield a single multimodal user message in Claude Agent SDK format.

View File

@@ -21,9 +21,11 @@ from typing import Any, AsyncGenerator, Optional
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv from dotenv import load_dotenv
from ..schemas import ImageAttachment from ..schemas import FileAttachment
from ..utils.document_extraction import DocumentExtractionError
from .chat_constants import ( from .chat_constants import (
ROOT_DIR, ROOT_DIR,
build_attachment_content_blocks,
check_rate_limit_error, check_rate_limit_error,
make_multimodal_message, make_multimodal_message,
safe_receive_response, safe_receive_response,
@@ -226,7 +228,7 @@ class ExpandChatSession:
async def send_message( async def send_message(
self, self,
user_message: str, user_message: str,
attachments: list[ImageAttachment] | None = None attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]: ) -> AsyncGenerator[dict, None]:
""" """
Send user message and stream Claude's response. Send user message and stream Claude's response.
@@ -273,7 +275,7 @@ class ExpandChatSession:
async def _query_claude( async def _query_claude(
self, self,
message: str, message: str,
attachments: list[ImageAttachment] | None = None attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]: ) -> AsyncGenerator[dict, None]:
""" """
Internal method to query Claude and stream responses. Internal method to query Claude and stream responses.
@@ -289,17 +291,16 @@ class ExpandChatSession:
content_blocks: list[dict[str, Any]] = [] content_blocks: list[dict[str, Any]] = []
if message: if message:
content_blocks.append({"type": "text", "text": message}) content_blocks.append({"type": "text", "text": message})
for att in attachments:
content_blocks.append({ # Add attachment blocks (images as image blocks, documents as extracted text)
"type": "image", try:
"source": { content_blocks.extend(build_attachment_content_blocks(attachments))
"type": "base64", except DocumentExtractionError as e:
"media_type": att.mimeType, yield {"type": "error", "content": str(e)}
"data": att.base64Data, return
}
})
await self.client.query(make_multimodal_message(content_blocks)) await self.client.query(make_multimodal_message(content_blocks))
logger.info(f"Sent multimodal message with {len(attachments)} image(s)") logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
else: else:
await self.client.query(message) await self.client.query(message)

View File

@@ -18,9 +18,11 @@ from typing import Any, AsyncGenerator, Optional
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv from dotenv import load_dotenv
from ..schemas import ImageAttachment from ..schemas import FileAttachment
from ..utils.document_extraction import DocumentExtractionError
from .chat_constants import ( from .chat_constants import (
ROOT_DIR, ROOT_DIR,
build_attachment_content_blocks,
check_rate_limit_error, check_rate_limit_error,
make_multimodal_message, make_multimodal_message,
safe_receive_response, safe_receive_response,
@@ -201,7 +203,7 @@ class SpecChatSession:
async def send_message( async def send_message(
self, self,
user_message: str, user_message: str,
attachments: list[ImageAttachment] | None = None attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]: ) -> AsyncGenerator[dict, None]:
""" """
Send user message and stream Claude's response. Send user message and stream Claude's response.
@@ -247,7 +249,7 @@ class SpecChatSession:
async def _query_claude( async def _query_claude(
self, self,
message: str, message: str,
attachments: list[ImageAttachment] | None = None attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]: ) -> AsyncGenerator[dict, None]:
""" """
Internal method to query Claude and stream responses. Internal method to query Claude and stream responses.
@@ -273,21 +275,17 @@ class SpecChatSession:
if message: if message:
content_blocks.append({"type": "text", "text": message}) content_blocks.append({"type": "text", "text": message})
# Add image blocks # Add attachment blocks (images as image blocks, documents as extracted text)
for att in attachments: try:
content_blocks.append({ content_blocks.extend(build_attachment_content_blocks(attachments))
"type": "image", except DocumentExtractionError as e:
"source": { yield {"type": "error", "content": str(e)}
"type": "base64", return
"media_type": att.mimeType,
"data": att.base64Data,
}
})
# Send multimodal content to Claude using async generator format # Send multimodal content to Claude using async generator format
# The SDK's query() accepts AsyncIterable[dict] for custom message formats # The SDK's query() accepts AsyncIterable[dict] for custom message formats
await self.client.query(make_multimodal_message(content_blocks)) await self.client.query(make_multimodal_message(content_blocks))
logger.info(f"Sent multimodal message with {len(attachments)} image(s)") logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
else: else:
# Text-only message: use string format # Text-only message: use string format
await self.client.query(message) await self.client.query(message)

View File

@@ -0,0 +1,221 @@
"""
Document Extraction Utility
============================
Extracts text content from various document formats in memory (no disk I/O).
Supports: TXT, MD, CSV, DOCX, XLSX, PDF, PPTX.
"""
import base64
import csv
import io
import logging
logger = logging.getLogger(__name__)
# Maximum characters of extracted text to send to Claude
MAX_EXTRACTED_CHARS = 200_000
# Maximum rows per sheet for Excel files
MAX_EXCEL_ROWS_PER_SHEET = 10_000
MAX_EXCEL_SHEETS = 50
# MIME type classification
DOCUMENT_MIME_TYPES: dict[str, str] = {
"text/plain": ".txt",
"text/markdown": ".md",
"text/csv": ".csv",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/pdf": ".pdf",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
}
IMAGE_MIME_TYPES = {"image/jpeg", "image/png"}
ALL_ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES | set(DOCUMENT_MIME_TYPES.keys())
def is_image(mime_type: str) -> bool:
"""Check if the MIME type is a supported image format."""
return mime_type in IMAGE_MIME_TYPES
def is_document(mime_type: str) -> bool:
"""Check if the MIME type is a supported document format."""
return mime_type in DOCUMENT_MIME_TYPES
class DocumentExtractionError(Exception):
"""Raised when text extraction from a document fails."""
def __init__(self, filename: str, reason: str):
self.filename = filename
self.reason = reason
super().__init__(f"Failed to read {filename}: {reason}")
def _truncate(text: str) -> str:
"""Truncate text if it exceeds the maximum character limit."""
if len(text) > MAX_EXTRACTED_CHARS:
omitted = len(text) - MAX_EXTRACTED_CHARS
return text[:MAX_EXTRACTED_CHARS] + f"\n\n[... truncated, {omitted:,} characters omitted]"
return text
def _extract_plain_text(data: bytes) -> str:
"""Extract text from plain text or markdown files."""
try:
return data.decode("utf-8")
except UnicodeDecodeError:
return data.decode("latin-1")
def _extract_csv(data: bytes) -> str:
"""Extract text from CSV files, formatted as a readable table."""
try:
text = data.decode("utf-8")
except UnicodeDecodeError:
text = data.decode("latin-1")
reader = csv.reader(io.StringIO(text))
lines = []
for i, row in enumerate(reader):
lines.append(f"Row {i + 1}: {', '.join(row)}")
return "\n".join(lines)
def _extract_docx(data: bytes) -> str:
"""Extract text from Word documents."""
from docx import Document
doc = Document(io.BytesIO(data))
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
return "\n\n".join(paragraphs)
def _extract_xlsx(data: bytes) -> str:
"""Extract text from Excel spreadsheets."""
from openpyxl import load_workbook
wb = load_workbook(io.BytesIO(data), read_only=True, data_only=True)
sections = []
for sheet_idx, sheet_name in enumerate(wb.sheetnames):
if sheet_idx >= MAX_EXCEL_SHEETS:
sections.append(f"\n[... {len(wb.sheetnames) - MAX_EXCEL_SHEETS} more sheets omitted]")
break
ws = wb[sheet_name]
rows_text = [f"=== Sheet: {sheet_name} ==="]
row_count = 0
for row in ws.iter_rows(values_only=True):
if row_count >= MAX_EXCEL_ROWS_PER_SHEET:
rows_text.append(f"[... more rows omitted, limit {MAX_EXCEL_ROWS_PER_SHEET:,} rows/sheet]")
break
cells = [str(cell) if cell is not None else "" for cell in row]
rows_text.append("\t".join(cells))
row_count += 1
sections.append("\n".join(rows_text))
wb.close()
return "\n\n".join(sections)
def _extract_pdf(data: bytes, filename: str) -> str:
"""Extract text from PDF files."""
from PyPDF2 import PdfReader
from PyPDF2.errors import PdfReadError
try:
reader = PdfReader(io.BytesIO(data))
except PdfReadError as e:
if "encrypt" in str(e).lower() or "password" in str(e).lower():
raise DocumentExtractionError(filename, "PDF is password-protected")
raise
if reader.is_encrypted:
raise DocumentExtractionError(filename, "PDF is password-protected")
pages = []
for i, page in enumerate(reader.pages):
text = page.extract_text()
if text and text.strip():
pages.append(f"--- Page {i + 1} ---\n{text}")
return "\n\n".join(pages)
def _extract_pptx(data: bytes) -> str:
"""Extract text from PowerPoint presentations."""
from pptx import Presentation
prs = Presentation(io.BytesIO(data))
slides_text = []
for i, slide in enumerate(prs.slides):
texts = []
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
text = paragraph.text.strip()
if text:
texts.append(text)
if texts:
slides_text.append(f"--- Slide {i + 1} ---\n" + "\n".join(texts))
return "\n\n".join(slides_text)
def extract_text_from_document(base64_data: str, mime_type: str, filename: str) -> str:
"""
Extract text content from a document file.
Args:
base64_data: Base64-encoded file content
mime_type: MIME type of the document
filename: Original filename (for error messages)
Returns:
Extracted text content, truncated if necessary
Raises:
DocumentExtractionError: If extraction fails
"""
if mime_type not in DOCUMENT_MIME_TYPES:
raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
try:
data = base64.b64decode(base64_data)
except Exception as e:
raise DocumentExtractionError(filename, f"invalid base64 data: {e}")
try:
if mime_type in ("text/plain", "text/markdown"):
text = _extract_plain_text(data)
elif mime_type == "text/csv":
text = _extract_csv(data)
elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
text = _extract_docx(data)
elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
text = _extract_xlsx(data)
elif mime_type == "application/pdf":
text = _extract_pdf(data, filename)
elif mime_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
text = _extract_pptx(data)
else:
raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
except DocumentExtractionError:
raise
except Exception as e:
logger.warning(f"Document extraction failed for {filename}: {e}")
raise DocumentExtractionError(
filename, "file appears to be corrupt or in an unexpected format"
)
if not text or not text.strip():
return f"[File {filename} is empty or contains no extractable text]"
return _truncate(text)

View File

@@ -6,10 +6,11 @@
*/ */
import { memo } from 'react' import { memo } from 'react'
import { Bot, User, Info } from 'lucide-react' import { Bot, User, Info, FileText } from 'lucide-react'
import ReactMarkdown, { type Components } from 'react-markdown' import ReactMarkdown, { type Components } from 'react-markdown'
import remarkGfm from 'remark-gfm' import remarkGfm from 'remark-gfm'
import type { ChatMessage as ChatMessageType } from '../lib/types' import type { ChatMessage as ChatMessageType } from '../lib/types'
import { isImageAttachment } from '../lib/types'
import { Card } from '@/components/ui/card' import { Card } from '@/components/ui/card'
interface ChatMessageProps { interface ChatMessageProps {
@@ -104,21 +105,35 @@ export const ChatMessage = memo(function ChatMessage({ message }: ChatMessagePro
</div> </div>
)} )}
{/* Display image attachments */} {/* Display file attachments */}
{attachments && attachments.length > 0 && ( {attachments && attachments.length > 0 && (
<div className={`flex flex-wrap gap-2 ${content ? 'mt-3' : ''}`}> <div className={`flex flex-wrap gap-2 ${content ? 'mt-3' : ''}`}>
{attachments.map((attachment) => ( {attachments.map((attachment) => (
<div key={attachment.id} className="border border-border rounded p-1 bg-card"> <div key={attachment.id} className="border border-border rounded p-1 bg-card">
<img {isImageAttachment(attachment) ? (
src={attachment.previewUrl} <>
alt={attachment.filename} <img
className="max-w-48 max-h-48 object-contain cursor-pointer hover:opacity-90 transition-opacity rounded" src={attachment.previewUrl}
onClick={() => window.open(attachment.previewUrl, '_blank')} alt={attachment.filename}
title={`${attachment.filename} (click to enlarge)`} className="max-w-48 max-h-48 object-contain cursor-pointer hover:opacity-90 transition-opacity rounded"
/> onClick={() => window.open(attachment.previewUrl, '_blank')}
<span className="text-xs text-muted-foreground block mt-1 text-center"> title={`${attachment.filename} (click to enlarge)`}
{attachment.filename} />
</span> <span className="text-xs text-muted-foreground block mt-1 text-center">
{attachment.filename}
</span>
</>
) : (
<div className="flex items-center gap-2 px-2 py-1">
<FileText size={16} className="text-muted-foreground flex-shrink-0" />
<span className="text-xs text-muted-foreground">
{attachment.filename}
</span>
<span className="text-xs text-muted-foreground/60">
({(attachment.size / 1024).toFixed(0)} KB)
</span>
</div>
)}
</div> </div>
))} ))}
</div> </div>

View File

@@ -6,20 +6,22 @@
*/ */
import { useCallback, useEffect, useRef, useState } from 'react' import { useCallback, useEffect, useRef, useState } from 'react'
import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus } from 'lucide-react' import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus, FileText } from 'lucide-react'
import { useExpandChat } from '../hooks/useExpandChat' import { useExpandChat } from '../hooks/useExpandChat'
import { ChatMessage } from './ChatMessage' import { ChatMessage } from './ChatMessage'
import { TypingIndicator } from './TypingIndicator' import { TypingIndicator } from './TypingIndicator'
import type { ImageAttachment } from '../lib/types' import type { FileAttachment } from '../lib/types'
import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types'
import { isSubmitEnter } from '../lib/keyboard' import { isSubmitEnter } from '../lib/keyboard'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input' import { Input } from '@/components/ui/input'
import { Card, CardContent } from '@/components/ui/card' import { Card, CardContent } from '@/components/ui/card'
import { Alert, AlertDescription } from '@/components/ui/alert' import { Alert, AlertDescription } from '@/components/ui/alert'
// Image upload validation constants // File upload validation constants
const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images
const ALLOWED_TYPES = ['image/jpeg', 'image/png'] const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents
const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png']
interface ExpandProjectChatProps { interface ExpandProjectChatProps {
projectName: string projectName: string
@@ -34,7 +36,7 @@ export function ExpandProjectChat({
}: ExpandProjectChatProps) { }: ExpandProjectChatProps) {
const [input, setInput] = useState('') const [input, setInput] = useState('')
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
const [pendingAttachments, setPendingAttachments] = useState<ImageAttachment[]>([]) const [pendingAttachments, setPendingAttachments] = useState<FileAttachment[]>([])
const messagesEndRef = useRef<HTMLDivElement>(null) const messagesEndRef = useRef<HTMLDivElement>(null)
const inputRef = useRef<HTMLInputElement>(null) const inputRef = useRef<HTMLInputElement>(null)
const fileInputRef = useRef<HTMLInputElement>(null) const fileInputRef = useRef<HTMLInputElement>(null)
@@ -95,20 +97,33 @@ export function ExpandProjectChat({
} }
} }
// File handling for image attachments // File handling for attachments (images and documents)
const handleFileSelect = useCallback((files: FileList | null) => { const handleFileSelect = useCallback((files: FileList | null) => {
if (!files) return if (!files) return
Array.from(files).forEach((file) => { Array.from(files).forEach((file) => {
// Validate file type // Resolve MIME type (browsers may not set it for .md files)
if (!ALLOWED_TYPES.includes(file.type)) { let mimeType = file.type
setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`) if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
return mimeType = resolveMimeType(file.name)
} }
// Validate file size // Validate file type
if (file.size > MAX_FILE_SIZE) { if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
setError(`File too large: ${file.name}. Maximum size is 5 MB.`) const ext = file.name.split('.').pop()?.toLowerCase()
if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) {
setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`)
return
}
mimeType = resolveMimeType(file.name)
}
// Validate size based on type
const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType)
const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE
const maxLabel = isImage ? '5 MB' : '20 MB'
if (file.size > maxSize) {
setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`)
return return
} }
@@ -118,12 +133,12 @@ export function ExpandProjectChat({
const dataUrl = e.target?.result as string const dataUrl = e.target?.result as string
const base64Data = dataUrl.split(',')[1] const base64Data = dataUrl.split(',')[1]
const attachment: ImageAttachment = { const attachment: FileAttachment = {
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
filename: file.name, filename: file.name,
mimeType: file.type as 'image/jpeg' | 'image/png', mimeType: mimeType as FileAttachment['mimeType'],
base64Data, base64Data,
previewUrl: dataUrl, previewUrl: isImage ? dataUrl : '',
size: file.size, size: file.size,
} }
@@ -291,11 +306,17 @@ export function ExpandProjectChat({
key={attachment.id} key={attachment.id}
className="relative group border-2 border-border p-1 bg-card rounded shadow-sm" className="relative group border-2 border-border p-1 bg-card rounded shadow-sm"
> >
<img {isImageAttachment(attachment) ? (
src={attachment.previewUrl} <img
alt={attachment.filename} src={attachment.previewUrl}
className="w-16 h-16 object-cover rounded" alt={attachment.filename}
/> className="w-16 h-16 object-cover rounded"
/>
) : (
<div className="w-16 h-16 flex items-center justify-center bg-muted rounded">
<FileText size={24} className="text-muted-foreground" />
</div>
)}
<button <button
onClick={() => handleRemoveAttachment(attachment.id)} onClick={() => handleRemoveAttachment(attachment.id)}
className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform" className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform"
@@ -318,7 +339,7 @@ export function ExpandProjectChat({
<input <input
ref={fileInputRef} ref={fileInputRef}
type="file" type="file"
accept="image/jpeg,image/png" accept="image/jpeg,image/png,.md,.txt,.csv,.docx,.xlsx,.pdf,.pptx"
multiple multiple
onChange={(e) => handleFileSelect(e.target.files)} onChange={(e) => handleFileSelect(e.target.files)}
className="hidden" className="hidden"
@@ -330,7 +351,7 @@ export function ExpandProjectChat({
disabled={connectionStatus !== 'connected'} disabled={connectionStatus !== 'connected'}
variant="ghost" variant="ghost"
size="icon" size="icon"
title="Attach image (JPEG, PNG - max 5MB)" title="Attach files (images: JPEG/PNG up to 5MB, documents: MD, TXT, CSV, DOCX, XLSX, PDF, PPTX up to 20MB)"
> >
<Paperclip size={18} /> <Paperclip size={18} />
</Button> </Button>
@@ -364,7 +385,7 @@ export function ExpandProjectChat({
{/* Help text */} {/* Help text */}
<p className="text-xs text-muted-foreground mt-2"> <p className="text-xs text-muted-foreground mt-2">
Press Enter to send. Drag & drop or click <Paperclip size={12} className="inline" /> to attach images. Press Enter to send. Drag & drop or click <Paperclip size={12} className="inline" /> to attach files.
</p> </p>
</div> </div>
)} )}

View File

@@ -11,16 +11,18 @@ import { useSpecChat } from '../hooks/useSpecChat'
import { ChatMessage } from './ChatMessage' import { ChatMessage } from './ChatMessage'
import { QuestionOptions } from './QuestionOptions' import { QuestionOptions } from './QuestionOptions'
import { TypingIndicator } from './TypingIndicator' import { TypingIndicator } from './TypingIndicator'
import type { ImageAttachment } from '../lib/types' import type { FileAttachment } from '../lib/types'
import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types'
import { isSubmitEnter } from '../lib/keyboard' import { isSubmitEnter } from '../lib/keyboard'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Textarea } from '@/components/ui/textarea' import { Textarea } from '@/components/ui/textarea'
import { Card, CardContent } from '@/components/ui/card' import { Card, CardContent } from '@/components/ui/card'
import { Alert, AlertDescription } from '@/components/ui/alert' import { Alert, AlertDescription } from '@/components/ui/alert'
// Image upload validation constants // File upload validation constants
const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images
const ALLOWED_TYPES = ['image/jpeg', 'image/png'] const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents
const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png']
// Sample prompt for quick testing // Sample prompt for quick testing
const SAMPLE_PROMPT = `Let's call it Simple Todo. This is a really simple web app that I can use to track my to-do items using a Kanban board. I should be able to add to-dos and then drag and drop them through the Kanban board. The different columns in the Kanban board are: const SAMPLE_PROMPT = `Let's call it Simple Todo. This is a really simple web app that I can use to track my to-do items using a Kanban board. I should be able to add to-dos and then drag and drop them through the Kanban board. The different columns in the Kanban board are:
@@ -64,7 +66,7 @@ export function SpecCreationChat({
const [input, setInput] = useState('') const [input, setInput] = useState('')
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
const [yoloEnabled, setYoloEnabled] = useState(false) const [yoloEnabled, setYoloEnabled] = useState(false)
const [pendingAttachments, setPendingAttachments] = useState<ImageAttachment[]>([]) const [pendingAttachments, setPendingAttachments] = useState<FileAttachment[]>([])
const messagesEndRef = useRef<HTMLDivElement>(null) const messagesEndRef = useRef<HTMLDivElement>(null)
const inputRef = useRef<HTMLTextAreaElement>(null) const inputRef = useRef<HTMLTextAreaElement>(null)
const fileInputRef = useRef<HTMLInputElement>(null) const fileInputRef = useRef<HTMLInputElement>(null)
@@ -138,20 +140,33 @@ export function SpecCreationChat({
sendAnswer(answers) sendAnswer(answers)
} }
// File handling for image attachments // File handling for attachments (images and documents)
const handleFileSelect = useCallback((files: FileList | null) => { const handleFileSelect = useCallback((files: FileList | null) => {
if (!files) return if (!files) return
Array.from(files).forEach((file) => { Array.from(files).forEach((file) => {
// Validate file type // Resolve MIME type (browsers may not set it for .md files)
if (!ALLOWED_TYPES.includes(file.type)) { let mimeType = file.type
setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`) if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
return mimeType = resolveMimeType(file.name)
} }
// Validate file size // Validate file type
if (file.size > MAX_FILE_SIZE) { if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
setError(`File too large: ${file.name}. Maximum size is 5 MB.`) const ext = file.name.split('.').pop()?.toLowerCase()
if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) {
setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`)
return
}
mimeType = resolveMimeType(file.name)
}
// Validate size based on type
const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType)
const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE
const maxLabel = isImage ? '5 MB' : '20 MB'
if (file.size > maxSize) {
setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`)
return return
} }
@@ -159,15 +174,14 @@ export function SpecCreationChat({
const reader = new FileReader() const reader = new FileReader()
reader.onload = (e) => { reader.onload = (e) => {
const dataUrl = e.target?.result as string const dataUrl = e.target?.result as string
// dataUrl is "data:image/png;base64,XXXXXX"
const base64Data = dataUrl.split(',')[1] const base64Data = dataUrl.split(',')[1]
const attachment: ImageAttachment = { const attachment: FileAttachment = {
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
filename: file.name, filename: file.name,
mimeType: file.type as 'image/jpeg' | 'image/png', mimeType: mimeType as FileAttachment['mimeType'],
base64Data, base64Data,
previewUrl: dataUrl, previewUrl: isImage ? dataUrl : '',
size: file.size, size: file.size,
} }
@@ -364,11 +378,17 @@ export function SpecCreationChat({
key={attachment.id} key={attachment.id}
className="relative group border-2 border-border p-1 bg-card rounded shadow-sm" className="relative group border-2 border-border p-1 bg-card rounded shadow-sm"
> >
<img {isImageAttachment(attachment) ? (
src={attachment.previewUrl} <img
alt={attachment.filename} src={attachment.previewUrl}
className="w-16 h-16 object-cover rounded" alt={attachment.filename}
/> className="w-16 h-16 object-cover rounded"
/>
) : (
<div className="w-16 h-16 flex items-center justify-center bg-muted rounded">
<FileText size={24} className="text-muted-foreground" />
</div>
)}
<button <button
onClick={() => handleRemoveAttachment(attachment.id)} onClick={() => handleRemoveAttachment(attachment.id)}
className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform" className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform"
@@ -391,7 +411,7 @@ export function SpecCreationChat({
<input <input
ref={fileInputRef} ref={fileInputRef}
type="file" type="file"
accept="image/jpeg,image/png" accept="image/jpeg,image/png,.md,.txt,.csv,.docx,.xlsx,.pdf,.pptx"
multiple multiple
onChange={(e) => handleFileSelect(e.target.files)} onChange={(e) => handleFileSelect(e.target.files)}
className="hidden" className="hidden"
@@ -403,7 +423,7 @@ export function SpecCreationChat({
disabled={connectionStatus !== 'connected'} disabled={connectionStatus !== 'connected'}
variant="ghost" variant="ghost"
size="icon" size="icon"
title="Attach image (JPEG, PNG - max 5MB)" title="Attach files (images: JPEG/PNG up to 5MB, documents: MD, TXT, CSV, DOCX, XLSX, PDF, PPTX up to 20MB)"
> >
<Paperclip size={18} /> <Paperclip size={18} />
</Button> </Button>
@@ -444,7 +464,7 @@ export function SpecCreationChat({
{/* Help text */} {/* Help text */}
<p className="text-xs text-muted-foreground mt-2"> <p className="text-xs text-muted-foreground mt-2">
Press Enter to send, Shift+Enter for new line. Drag & drop or click <Paperclip size={12} className="inline" /> to attach images (JPEG/PNG, max 5MB). Press Enter to send, Shift+Enter for new line. Drag & drop or click <Paperclip size={12} className="inline" /> to attach files.
</p> </p>
</div> </div>
)} )}

View File

@@ -3,7 +3,7 @@
*/ */
import { useState, useCallback, useRef, useEffect } from 'react' import { useState, useCallback, useRef, useEffect } from 'react'
import type { ChatMessage, ImageAttachment, ExpandChatServerMessage } from '../lib/types' import type { ChatMessage, FileAttachment, ExpandChatServerMessage } from '../lib/types'
type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error' type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'
@@ -27,7 +27,7 @@ interface UseExpandChatReturn {
featuresCreated: number featuresCreated: number
recentFeatures: CreatedFeature[] recentFeatures: CreatedFeature[]
start: () => void start: () => void
sendMessage: (content: string, attachments?: ImageAttachment[]) => void sendMessage: (content: string, attachments?: FileAttachment[]) => void
disconnect: () => void disconnect: () => void
} }
@@ -278,7 +278,7 @@ export function useExpandChat({
setTimeout(checkAndSend, 100) setTimeout(checkAndSend, 100)
}, [connect, onError]) }, [connect, onError])
const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => { const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => {
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) { if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
onError?.('Not connected') onError?.('Not connected')
return return

View File

@@ -3,7 +3,7 @@
*/ */
import { useState, useCallback, useRef, useEffect } from 'react' import { useState, useCallback, useRef, useEffect } from 'react'
import type { ChatMessage, ImageAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types' import type { ChatMessage, FileAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types'
import { getSpecStatus } from '../lib/api' import { getSpecStatus } from '../lib/api'
type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error' type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'
@@ -22,7 +22,7 @@ interface UseSpecChatReturn {
currentQuestions: SpecQuestion[] | null currentQuestions: SpecQuestion[] | null
currentToolId: string | null currentToolId: string | null
start: () => void start: () => void
sendMessage: (content: string, attachments?: ImageAttachment[]) => void sendMessage: (content: string, attachments?: FileAttachment[]) => void
sendAnswer: (answers: Record<string, string | string[]>) => void sendAnswer: (answers: Record<string, string | string[]>) => void
disconnect: () => void disconnect: () => void
} }
@@ -367,7 +367,7 @@ export function useSpecChat({
setTimeout(checkAndSend, 100) setTimeout(checkAndSend, 100)
}, [connect]) }, [connect])
const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => { const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => {
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) { if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
onError?.('Not connected') onError?.('Not connected')
return return

View File

@@ -417,22 +417,67 @@ export type SpecChatServerMessage =
| SpecChatPongMessage | SpecChatPongMessage
| SpecChatResponseDoneMessage | SpecChatResponseDoneMessage
// Image attachment for chat messages // File attachment for chat messages (images and documents)
export interface ImageAttachment { export interface FileAttachment {
id: string id: string
filename: string filename: string
mimeType: 'image/jpeg' | 'image/png' mimeType:
| 'image/jpeg'
| 'image/png'
| 'text/plain'
| 'text/markdown'
| 'text/csv'
| 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
| 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
| 'application/pdf'
| 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
base64Data: string // Raw base64 (without data: prefix) base64Data: string // Raw base64 (without data: prefix)
previewUrl: string // data: URL for display previewUrl: string // data: URL for images, empty string for documents
size: number // File size in bytes size: number // File size in bytes
} }
/** @deprecated Use FileAttachment instead */
export type ImageAttachment = FileAttachment
export const IMAGE_MIME_TYPES = ['image/jpeg', 'image/png'] as const
export const DOCUMENT_MIME_TYPES = [
'text/plain',
'text/markdown',
'text/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/pdf',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
] as const
export const ALL_ALLOWED_MIME_TYPES: string[] = [...IMAGE_MIME_TYPES, ...DOCUMENT_MIME_TYPES]
export function isImageAttachment(att: FileAttachment): boolean {
return (IMAGE_MIME_TYPES as readonly string[]).includes(att.mimeType)
}
export function resolveMimeType(filename: string): string {
const ext = filename.split('.').pop()?.toLowerCase()
const map: Record<string, string> = {
md: 'text/markdown',
txt: 'text/plain',
csv: 'text/csv',
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
pdf: 'application/pdf',
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
png: 'image/png',
}
return map[ext || ''] || 'application/octet-stream'
}
// UI chat message for display // UI chat message for display
export interface ChatMessage { export interface ChatMessage {
id: string id: string
role: 'user' | 'assistant' | 'system' role: 'user' | 'assistant' | 'system'
content: string content: string
attachments?: ImageAttachment[] attachments?: FileAttachment[]
timestamp: Date timestamp: Date
questions?: SpecQuestion[] questions?: SpecQuestion[]
isStreaming?: boolean isStreaming?: boolean