Merge pull request #227 from AutoForgeAI/feat/document-file-uploads

feat: add document file upload support for spec creation and project expansion
This commit is contained in:
Leon van Zyl
2026-03-25 12:52:44 +02:00
committed by GitHub
15 changed files with 513 additions and 123 deletions

View File

@@ -12,3 +12,7 @@ aiofiles>=24.0.0
apscheduler>=3.10.0,<4.0.0
pywinpty>=2.0.0; sys_platform == "win32"
pyyaml>=6.0.0
python-docx>=1.1.0
openpyxl>=3.1.0
PyPDF2>=3.0.0
python-pptx>=1.0.0

View File

@@ -10,6 +10,10 @@ aiofiles>=24.0.0
apscheduler>=3.10.0,<4.0.0
pywinpty>=2.0.0; sys_platform == "win32"
pyyaml>=6.0.0
python-docx>=1.1.0
openpyxl>=3.1.0
PyPDF2>=3.0.0
python-pptx>=1.0.0
# Dev dependencies
ruff>=0.8.0

View File

@@ -13,7 +13,7 @@ from typing import Optional
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
from pydantic import BaseModel, ValidationError
from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..services.expand_chat_session import (
ExpandChatSession,
create_expand_session,
@@ -181,12 +181,12 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str):
user_content = message.get("content", "").strip()
# Parse attachments if present
attachments: list[ImageAttachment] = []
attachments: list[FileAttachment] = []
raw_attachments = message.get("attachments", [])
if raw_attachments:
try:
for raw_att in raw_attachments:
attachments.append(ImageAttachment(**raw_att))
attachments.append(FileAttachment(**raw_att))
except (ValidationError, Exception) as e:
logger.warning(f"Invalid attachment data: {e}")
await websocket.send_json({

View File

@@ -12,7 +12,7 @@ from typing import Optional
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
from pydantic import BaseModel, ValidationError
from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..services.spec_chat_session import (
SpecChatSession,
create_session,
@@ -242,12 +242,12 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
user_content = message.get("content", "").strip()
# Parse attachments if present
attachments: list[ImageAttachment] = []
attachments: list[FileAttachment] = []
raw_attachments = message.get("attachments", [])
if raw_attachments:
try:
for raw_att in raw_attachments:
attachments.append(ImageAttachment(**raw_att))
attachments.append(FileAttachment(**raw_att))
except (ValidationError, Exception) as e:
logger.warning(f"Invalid attachment data: {e}")
await websocket.send_json({

View File

@@ -11,7 +11,7 @@ from datetime import datetime
from pathlib import Path
from typing import Literal
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, field_validator, model_validator
# Import model constants from registry (single source of truth)
_root = Path(__file__).parent.parent
@@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel):
# ============================================================================
# Spec Chat Schemas
# Chat Attachment Schemas
# ============================================================================
# Maximum image file size: 5 MB
MAX_IMAGE_SIZE = 5 * 1024 * 1024
# Size limits
MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5 MB for images
MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 # 20 MB for documents
_IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'}
class ImageAttachment(BaseModel):
"""Image attachment from client for spec creation chat."""
class FileAttachment(BaseModel):
"""File attachment from client for spec creation / expand project chat."""
filename: str = Field(..., min_length=1, max_length=255)
mimeType: Literal['image/jpeg', 'image/png']
mimeType: Literal[
'image/jpeg', 'image/png',
'text/plain', 'text/markdown', 'text/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/pdf',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
]
base64Data: str
@field_validator('base64Data')
@classmethod
def validate_base64_and_size(cls, v: str) -> str:
"""Validate that base64 data is valid and within size limit."""
def validate_base64(cls, v: str) -> str:
"""Validate that base64 data is decodable."""
try:
decoded = base64.b64decode(v)
if len(decoded) > MAX_IMAGE_SIZE:
raise ValueError(
f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
)
base64.b64decode(v)
return v
except Exception as e:
if 'Image size' in str(e):
raise
raise ValueError(f'Invalid base64 data: {e}')
@model_validator(mode='after')
def validate_size(self) -> 'FileAttachment':
"""Validate file size based on MIME type."""
try:
decoded = base64.b64decode(self.base64Data)
except Exception:
return self # Already caught by field validator
if self.mimeType in _IMAGE_MIME_TYPES:
max_size = MAX_IMAGE_SIZE
label = "Image"
else:
max_size = MAX_DOCUMENT_SIZE
label = "Document"
if len(decoded) > max_size:
raise ValueError(
f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {max_size // (1024 * 1024)} MB'
)
return self
# ============================================================================
# Filesystem Schemas

View File

@@ -35,6 +35,13 @@ if _root_str not in sys.path:
from env_constants import API_ENV_VARS # noqa: E402, F401
from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401
from ..schemas import FileAttachment
from ..utils.document_extraction import (
extract_text_from_document,
is_document,
is_image,
)
logger = logging.getLogger(__name__)
@@ -88,6 +95,35 @@ async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenera
raise
def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
"""Convert FileAttachment objects to Claude API content blocks.
Images become image content blocks (passed directly to Claude's vision).
Documents are extracted to text and become text content blocks.
Raises:
DocumentExtractionError: If a document cannot be read.
"""
blocks: list[dict] = []
for att in attachments:
if is_image(att.mimeType):
blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
elif is_document(att.mimeType):
text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
blocks.append({
"type": "text",
"text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
})
return blocks
async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
"""Yield a single multimodal user message in Claude Agent SDK format.

View File

@@ -21,9 +21,11 @@ from typing import Any, AsyncGenerator, Optional
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv
from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..utils.document_extraction import DocumentExtractionError
from .chat_constants import (
ROOT_DIR,
build_attachment_content_blocks,
check_rate_limit_error,
make_multimodal_message,
safe_receive_response,
@@ -226,7 +228,7 @@ class ExpandChatSession:
async def send_message(
self,
user_message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Send user message and stream Claude's response.
@@ -273,7 +275,7 @@ class ExpandChatSession:
async def _query_claude(
self,
message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Internal method to query Claude and stream responses.
@@ -289,17 +291,16 @@ class ExpandChatSession:
content_blocks: list[dict[str, Any]] = []
if message:
content_blocks.append({"type": "text", "text": message})
for att in attachments:
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
# Add attachment blocks (images as image blocks, documents as extracted text)
try:
content_blocks.extend(build_attachment_content_blocks(attachments))
except DocumentExtractionError as e:
yield {"type": "error", "content": str(e)}
return
await self.client.query(make_multimodal_message(content_blocks))
logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
else:
await self.client.query(message)

View File

@@ -18,9 +18,11 @@ from typing import Any, AsyncGenerator, Optional
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv
from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..utils.document_extraction import DocumentExtractionError
from .chat_constants import (
ROOT_DIR,
build_attachment_content_blocks,
check_rate_limit_error,
make_multimodal_message,
safe_receive_response,
@@ -201,7 +203,7 @@ class SpecChatSession:
async def send_message(
self,
user_message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Send user message and stream Claude's response.
@@ -247,7 +249,7 @@ class SpecChatSession:
async def _query_claude(
self,
message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Internal method to query Claude and stream responses.
@@ -273,21 +275,17 @@ class SpecChatSession:
if message:
content_blocks.append({"type": "text", "text": message})
# Add image blocks
for att in attachments:
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
# Add attachment blocks (images as image blocks, documents as extracted text)
try:
content_blocks.extend(build_attachment_content_blocks(attachments))
except DocumentExtractionError as e:
yield {"type": "error", "content": str(e)}
return
# Send multimodal content to Claude using async generator format
# The SDK's query() accepts AsyncIterable[dict] for custom message formats
await self.client.query(make_multimodal_message(content_blocks))
logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
else:
# Text-only message: use string format
await self.client.query(message)

View File

@@ -0,0 +1,221 @@
"""
Document Extraction Utility
============================
Extracts text content from various document formats in memory (no disk I/O).
Supports: TXT, MD, CSV, DOCX, XLSX, PDF, PPTX.
"""
import base64
import csv
import io
import logging
logger = logging.getLogger(__name__)
# Maximum characters of extracted text to send to Claude
MAX_EXTRACTED_CHARS = 200_000
# Maximum rows per sheet for Excel files
MAX_EXCEL_ROWS_PER_SHEET = 10_000
MAX_EXCEL_SHEETS = 50
# MIME type classification
DOCUMENT_MIME_TYPES: dict[str, str] = {
"text/plain": ".txt",
"text/markdown": ".md",
"text/csv": ".csv",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/pdf": ".pdf",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
}
IMAGE_MIME_TYPES = {"image/jpeg", "image/png"}
ALL_ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES | set(DOCUMENT_MIME_TYPES.keys())
def is_image(mime_type: str) -> bool:
"""Check if the MIME type is a supported image format."""
return mime_type in IMAGE_MIME_TYPES
def is_document(mime_type: str) -> bool:
"""Check if the MIME type is a supported document format."""
return mime_type in DOCUMENT_MIME_TYPES
class DocumentExtractionError(Exception):
"""Raised when text extraction from a document fails."""
def __init__(self, filename: str, reason: str):
self.filename = filename
self.reason = reason
super().__init__(f"Failed to read {filename}: {reason}")
def _truncate(text: str) -> str:
"""Truncate text if it exceeds the maximum character limit."""
if len(text) > MAX_EXTRACTED_CHARS:
omitted = len(text) - MAX_EXTRACTED_CHARS
return text[:MAX_EXTRACTED_CHARS] + f"\n\n[... truncated, {omitted:,} characters omitted]"
return text
def _extract_plain_text(data: bytes) -> str:
"""Extract text from plain text or markdown files."""
try:
return data.decode("utf-8")
except UnicodeDecodeError:
return data.decode("latin-1")
def _extract_csv(data: bytes) -> str:
"""Extract text from CSV files, formatted as a readable table."""
try:
text = data.decode("utf-8")
except UnicodeDecodeError:
text = data.decode("latin-1")
reader = csv.reader(io.StringIO(text))
lines = []
for i, row in enumerate(reader):
lines.append(f"Row {i + 1}: {', '.join(row)}")
return "\n".join(lines)
def _extract_docx(data: bytes) -> str:
"""Extract text from Word documents."""
from docx import Document
doc = Document(io.BytesIO(data))
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
return "\n\n".join(paragraphs)
def _extract_xlsx(data: bytes) -> str:
"""Extract text from Excel spreadsheets."""
from openpyxl import load_workbook
wb = load_workbook(io.BytesIO(data), read_only=True, data_only=True)
sections = []
for sheet_idx, sheet_name in enumerate(wb.sheetnames):
if sheet_idx >= MAX_EXCEL_SHEETS:
sections.append(f"\n[... {len(wb.sheetnames) - MAX_EXCEL_SHEETS} more sheets omitted]")
break
ws = wb[sheet_name]
rows_text = [f"=== Sheet: {sheet_name} ==="]
row_count = 0
for row in ws.iter_rows(values_only=True):
if row_count >= MAX_EXCEL_ROWS_PER_SHEET:
rows_text.append(f"[... more rows omitted, limit {MAX_EXCEL_ROWS_PER_SHEET:,} rows/sheet]")
break
cells = [str(cell) if cell is not None else "" for cell in row]
rows_text.append("\t".join(cells))
row_count += 1
sections.append("\n".join(rows_text))
wb.close()
return "\n\n".join(sections)
def _extract_pdf(data: bytes, filename: str) -> str:
"""Extract text from PDF files."""
from PyPDF2 import PdfReader
from PyPDF2.errors import PdfReadError
try:
reader = PdfReader(io.BytesIO(data))
except PdfReadError as e:
if "encrypt" in str(e).lower() or "password" in str(e).lower():
raise DocumentExtractionError(filename, "PDF is password-protected")
raise
if reader.is_encrypted:
raise DocumentExtractionError(filename, "PDF is password-protected")
pages = []
for i, page in enumerate(reader.pages):
text = page.extract_text()
if text and text.strip():
pages.append(f"--- Page {i + 1} ---\n{text}")
return "\n\n".join(pages)
def _extract_pptx(data: bytes) -> str:
"""Extract text from PowerPoint presentations."""
from pptx import Presentation
prs = Presentation(io.BytesIO(data))
slides_text = []
for i, slide in enumerate(prs.slides):
texts = []
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
text = paragraph.text.strip()
if text:
texts.append(text)
if texts:
slides_text.append(f"--- Slide {i + 1} ---\n" + "\n".join(texts))
return "\n\n".join(slides_text)
def extract_text_from_document(base64_data: str, mime_type: str, filename: str) -> str:
"""
Extract text content from a document file.
Args:
base64_data: Base64-encoded file content
mime_type: MIME type of the document
filename: Original filename (for error messages)
Returns:
Extracted text content, truncated if necessary
Raises:
DocumentExtractionError: If extraction fails
"""
if mime_type not in DOCUMENT_MIME_TYPES:
raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
try:
data = base64.b64decode(base64_data)
except Exception as e:
raise DocumentExtractionError(filename, f"invalid base64 data: {e}")
try:
if mime_type in ("text/plain", "text/markdown"):
text = _extract_plain_text(data)
elif mime_type == "text/csv":
text = _extract_csv(data)
elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
text = _extract_docx(data)
elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
text = _extract_xlsx(data)
elif mime_type == "application/pdf":
text = _extract_pdf(data, filename)
elif mime_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
text = _extract_pptx(data)
else:
raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
except DocumentExtractionError:
raise
except Exception as e:
logger.warning(f"Document extraction failed for {filename}: {e}")
raise DocumentExtractionError(
filename, "file appears to be corrupt or in an unexpected format"
)
if not text or not text.strip():
return f"[File {filename} is empty or contains no extractable text]"
return _truncate(text)

View File

@@ -6,10 +6,11 @@
*/
import { memo } from 'react'
import { Bot, User, Info } from 'lucide-react'
import { Bot, User, Info, FileText } from 'lucide-react'
import ReactMarkdown, { type Components } from 'react-markdown'
import remarkGfm from 'remark-gfm'
import type { ChatMessage as ChatMessageType } from '../lib/types'
import { isImageAttachment } from '../lib/types'
import { Card } from '@/components/ui/card'
interface ChatMessageProps {
@@ -104,21 +105,35 @@ export const ChatMessage = memo(function ChatMessage({ message }: ChatMessagePro
</div>
)}
{/* Display image attachments */}
{/* Display file attachments */}
{attachments && attachments.length > 0 && (
<div className={`flex flex-wrap gap-2 ${content ? 'mt-3' : ''}`}>
{attachments.map((attachment) => (
<div key={attachment.id} className="border border-border rounded p-1 bg-card">
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="max-w-48 max-h-48 object-contain cursor-pointer hover:opacity-90 transition-opacity rounded"
onClick={() => window.open(attachment.previewUrl, '_blank')}
title={`${attachment.filename} (click to enlarge)`}
/>
<span className="text-xs text-muted-foreground block mt-1 text-center">
{attachment.filename}
</span>
{isImageAttachment(attachment) ? (
<>
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="max-w-48 max-h-48 object-contain cursor-pointer hover:opacity-90 transition-opacity rounded"
onClick={() => window.open(attachment.previewUrl, '_blank')}
title={`${attachment.filename} (click to enlarge)`}
/>
<span className="text-xs text-muted-foreground block mt-1 text-center">
{attachment.filename}
</span>
</>
) : (
<div className="flex items-center gap-2 px-2 py-1">
<FileText size={16} className="text-muted-foreground flex-shrink-0" />
<span className="text-xs text-muted-foreground">
{attachment.filename}
</span>
<span className="text-xs text-muted-foreground/60">
({(attachment.size / 1024).toFixed(0)} KB)
</span>
</div>
)}
</div>
))}
</div>

View File

@@ -6,20 +6,22 @@
*/
import { useCallback, useEffect, useRef, useState } from 'react'
import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus } from 'lucide-react'
import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus, FileText } from 'lucide-react'
import { useExpandChat } from '../hooks/useExpandChat'
import { ChatMessage } from './ChatMessage'
import { TypingIndicator } from './TypingIndicator'
import type { ImageAttachment } from '../lib/types'
import type { FileAttachment } from '../lib/types'
import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types'
import { isSubmitEnter } from '../lib/keyboard'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { Card, CardContent } from '@/components/ui/card'
import { Alert, AlertDescription } from '@/components/ui/alert'
// Image upload validation constants
const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB
const ALLOWED_TYPES = ['image/jpeg', 'image/png']
// File upload validation constants
const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images
const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents
const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png']
interface ExpandProjectChatProps {
projectName: string
@@ -34,7 +36,7 @@ export function ExpandProjectChat({
}: ExpandProjectChatProps) {
const [input, setInput] = useState('')
const [error, setError] = useState<string | null>(null)
const [pendingAttachments, setPendingAttachments] = useState<ImageAttachment[]>([])
const [pendingAttachments, setPendingAttachments] = useState<FileAttachment[]>([])
const messagesEndRef = useRef<HTMLDivElement>(null)
const inputRef = useRef<HTMLInputElement>(null)
const fileInputRef = useRef<HTMLInputElement>(null)
@@ -95,20 +97,33 @@ export function ExpandProjectChat({
}
}
// File handling for image attachments
// File handling for attachments (images and documents)
const handleFileSelect = useCallback((files: FileList | null) => {
if (!files) return
Array.from(files).forEach((file) => {
// Validate file type
if (!ALLOWED_TYPES.includes(file.type)) {
setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`)
return
// Resolve MIME type (browsers may not set it for .md files)
let mimeType = file.type
if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
mimeType = resolveMimeType(file.name)
}
// Validate file size
if (file.size > MAX_FILE_SIZE) {
setError(`File too large: ${file.name}. Maximum size is 5 MB.`)
// Validate file type
if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
const ext = file.name.split('.').pop()?.toLowerCase()
if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) {
setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`)
return
}
mimeType = resolveMimeType(file.name)
}
// Validate size based on type
const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType)
const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE
const maxLabel = isImage ? '5 MB' : '20 MB'
if (file.size > maxSize) {
setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`)
return
}
@@ -118,12 +133,12 @@ export function ExpandProjectChat({
const dataUrl = e.target?.result as string
const base64Data = dataUrl.split(',')[1]
const attachment: ImageAttachment = {
const attachment: FileAttachment = {
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
filename: file.name,
mimeType: file.type as 'image/jpeg' | 'image/png',
mimeType: mimeType as FileAttachment['mimeType'],
base64Data,
previewUrl: dataUrl,
previewUrl: isImage ? dataUrl : '',
size: file.size,
}
@@ -291,11 +306,17 @@ export function ExpandProjectChat({
key={attachment.id}
className="relative group border-2 border-border p-1 bg-card rounded shadow-sm"
>
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="w-16 h-16 object-cover rounded"
/>
{isImageAttachment(attachment) ? (
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="w-16 h-16 object-cover rounded"
/>
) : (
<div className="w-16 h-16 flex items-center justify-center bg-muted rounded">
<FileText size={24} className="text-muted-foreground" />
</div>
)}
<button
onClick={() => handleRemoveAttachment(attachment.id)}
className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform"
@@ -318,7 +339,7 @@ export function ExpandProjectChat({
<input
ref={fileInputRef}
type="file"
accept="image/jpeg,image/png"
accept="image/jpeg,image/png,.md,.txt,.csv,.docx,.xlsx,.pdf,.pptx"
multiple
onChange={(e) => handleFileSelect(e.target.files)}
className="hidden"
@@ -330,7 +351,7 @@ export function ExpandProjectChat({
disabled={connectionStatus !== 'connected'}
variant="ghost"
size="icon"
title="Attach image (JPEG, PNG - max 5MB)"
title="Attach files (images: JPEG/PNG up to 5MB, documents: MD, TXT, CSV, DOCX, XLSX, PDF, PPTX up to 20MB)"
>
<Paperclip size={18} />
</Button>
@@ -364,7 +385,7 @@ export function ExpandProjectChat({
{/* Help text */}
<p className="text-xs text-muted-foreground mt-2">
Press Enter to send. Drag & drop or click <Paperclip size={12} className="inline" /> to attach images.
Press Enter to send. Drag & drop or click <Paperclip size={12} className="inline" /> to attach files.
</p>
</div>
)}

View File

@@ -11,16 +11,18 @@ import { useSpecChat } from '../hooks/useSpecChat'
import { ChatMessage } from './ChatMessage'
import { QuestionOptions } from './QuestionOptions'
import { TypingIndicator } from './TypingIndicator'
import type { ImageAttachment } from '../lib/types'
import type { FileAttachment } from '../lib/types'
import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types'
import { isSubmitEnter } from '../lib/keyboard'
import { Button } from '@/components/ui/button'
import { Textarea } from '@/components/ui/textarea'
import { Card, CardContent } from '@/components/ui/card'
import { Alert, AlertDescription } from '@/components/ui/alert'
// Image upload validation constants
const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB
const ALLOWED_TYPES = ['image/jpeg', 'image/png']
// File upload validation constants
const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images
const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents
const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png']
// Sample prompt for quick testing
const SAMPLE_PROMPT = `Let's call it Simple Todo. This is a really simple web app that I can use to track my to-do items using a Kanban board. I should be able to add to-dos and then drag and drop them through the Kanban board. The different columns in the Kanban board are:
@@ -64,7 +66,7 @@ export function SpecCreationChat({
const [input, setInput] = useState('')
const [error, setError] = useState<string | null>(null)
const [yoloEnabled, setYoloEnabled] = useState(false)
const [pendingAttachments, setPendingAttachments] = useState<ImageAttachment[]>([])
const [pendingAttachments, setPendingAttachments] = useState<FileAttachment[]>([])
const messagesEndRef = useRef<HTMLDivElement>(null)
const inputRef = useRef<HTMLTextAreaElement>(null)
const fileInputRef = useRef<HTMLInputElement>(null)
@@ -138,20 +140,33 @@ export function SpecCreationChat({
sendAnswer(answers)
}
// File handling for image attachments
// File handling for attachments (images and documents)
const handleFileSelect = useCallback((files: FileList | null) => {
if (!files) return
Array.from(files).forEach((file) => {
// Validate file type
if (!ALLOWED_TYPES.includes(file.type)) {
setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`)
return
// Resolve MIME type (browsers may not set it for .md files)
let mimeType = file.type
if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
mimeType = resolveMimeType(file.name)
}
// Validate file size
if (file.size > MAX_FILE_SIZE) {
setError(`File too large: ${file.name}. Maximum size is 5 MB.`)
// Validate file type
if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
const ext = file.name.split('.').pop()?.toLowerCase()
if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) {
setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`)
return
}
mimeType = resolveMimeType(file.name)
}
// Validate size based on type
const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType)
const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE
const maxLabel = isImage ? '5 MB' : '20 MB'
if (file.size > maxSize) {
setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`)
return
}
@@ -159,15 +174,14 @@ export function SpecCreationChat({
const reader = new FileReader()
reader.onload = (e) => {
const dataUrl = e.target?.result as string
// dataUrl is "data:image/png;base64,XXXXXX"
const base64Data = dataUrl.split(',')[1]
const attachment: ImageAttachment = {
const attachment: FileAttachment = {
id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
filename: file.name,
mimeType: file.type as 'image/jpeg' | 'image/png',
mimeType: mimeType as FileAttachment['mimeType'],
base64Data,
previewUrl: dataUrl,
previewUrl: isImage ? dataUrl : '',
size: file.size,
}
@@ -364,11 +378,17 @@ export function SpecCreationChat({
key={attachment.id}
className="relative group border-2 border-border p-1 bg-card rounded shadow-sm"
>
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="w-16 h-16 object-cover rounded"
/>
{isImageAttachment(attachment) ? (
<img
src={attachment.previewUrl}
alt={attachment.filename}
className="w-16 h-16 object-cover rounded"
/>
) : (
<div className="w-16 h-16 flex items-center justify-center bg-muted rounded">
<FileText size={24} className="text-muted-foreground" />
</div>
)}
<button
onClick={() => handleRemoveAttachment(attachment.id)}
className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform"
@@ -391,7 +411,7 @@ export function SpecCreationChat({
<input
ref={fileInputRef}
type="file"
accept="image/jpeg,image/png"
accept="image/jpeg,image/png,.md,.txt,.csv,.docx,.xlsx,.pdf,.pptx"
multiple
onChange={(e) => handleFileSelect(e.target.files)}
className="hidden"
@@ -403,7 +423,7 @@ export function SpecCreationChat({
disabled={connectionStatus !== 'connected'}
variant="ghost"
size="icon"
title="Attach image (JPEG, PNG - max 5MB)"
title="Attach files (images: JPEG/PNG up to 5MB, documents: MD, TXT, CSV, DOCX, XLSX, PDF, PPTX up to 20MB)"
>
<Paperclip size={18} />
</Button>
@@ -444,7 +464,7 @@ export function SpecCreationChat({
{/* Help text */}
<p className="text-xs text-muted-foreground mt-2">
Press Enter to send, Shift+Enter for new line. Drag & drop or click <Paperclip size={12} className="inline" /> to attach images (JPEG/PNG, max 5MB).
Press Enter to send, Shift+Enter for new line. Drag & drop or click <Paperclip size={12} className="inline" /> to attach files.
</p>
</div>
)}

View File

@@ -3,7 +3,7 @@
*/
import { useState, useCallback, useRef, useEffect } from 'react'
import type { ChatMessage, ImageAttachment, ExpandChatServerMessage } from '../lib/types'
import type { ChatMessage, FileAttachment, ExpandChatServerMessage } from '../lib/types'
type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'
@@ -27,7 +27,7 @@ interface UseExpandChatReturn {
featuresCreated: number
recentFeatures: CreatedFeature[]
start: () => void
sendMessage: (content: string, attachments?: ImageAttachment[]) => void
sendMessage: (content: string, attachments?: FileAttachment[]) => void
disconnect: () => void
}
@@ -278,7 +278,7 @@ export function useExpandChat({
setTimeout(checkAndSend, 100)
}, [connect, onError])
const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => {
const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => {
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
onError?.('Not connected')
return

View File

@@ -3,7 +3,7 @@
*/
import { useState, useCallback, useRef, useEffect } from 'react'
import type { ChatMessage, ImageAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types'
import type { ChatMessage, FileAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types'
import { getSpecStatus } from '../lib/api'
type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'
@@ -22,7 +22,7 @@ interface UseSpecChatReturn {
currentQuestions: SpecQuestion[] | null
currentToolId: string | null
start: () => void
sendMessage: (content: string, attachments?: ImageAttachment[]) => void
sendMessage: (content: string, attachments?: FileAttachment[]) => void
sendAnswer: (answers: Record<string, string | string[]>) => void
disconnect: () => void
}
@@ -367,7 +367,7 @@ export function useSpecChat({
setTimeout(checkAndSend, 100)
}, [connect])
const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => {
const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => {
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
onError?.('Not connected')
return

View File

@@ -417,22 +417,67 @@ export type SpecChatServerMessage =
| SpecChatPongMessage
| SpecChatResponseDoneMessage
// Image attachment for chat messages
export interface ImageAttachment {
// File attachment for chat messages (images and documents)
export interface FileAttachment {
id: string
filename: string
mimeType: 'image/jpeg' | 'image/png'
mimeType:
| 'image/jpeg'
| 'image/png'
| 'text/plain'
| 'text/markdown'
| 'text/csv'
| 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
| 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
| 'application/pdf'
| 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
base64Data: string // Raw base64 (without data: prefix)
previewUrl: string // data: URL for display
previewUrl: string // data: URL for images, empty string for documents
size: number // File size in bytes
}
/** @deprecated Use FileAttachment instead */
export type ImageAttachment = FileAttachment
export const IMAGE_MIME_TYPES = ['image/jpeg', 'image/png'] as const
export const DOCUMENT_MIME_TYPES = [
'text/plain',
'text/markdown',
'text/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/pdf',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
] as const
export const ALL_ALLOWED_MIME_TYPES: string[] = [...IMAGE_MIME_TYPES, ...DOCUMENT_MIME_TYPES]
export function isImageAttachment(att: FileAttachment): boolean {
return (IMAGE_MIME_TYPES as readonly string[]).includes(att.mimeType)
}
export function resolveMimeType(filename: string): string {
const ext = filename.split('.').pop()?.toLowerCase()
const map: Record<string, string> = {
md: 'text/markdown',
txt: 'text/plain',
csv: 'text/csv',
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
pdf: 'application/pdf',
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
png: 'image/png',
}
return map[ext || ''] || 'application/octet-stream'
}
// UI chat message for display
export interface ChatMessage {
id: string
role: 'user' | 'assistant' | 'system'
content: string
attachments?: ImageAttachment[]
attachments?: FileAttachment[]
timestamp: Date
questions?: SpecQuestion[]
isStreaming?: boolean