feat: add document file upload support for spec creation and project expansion

Add support for uploading Markdown, Text, Word (.docx), CSV, Excel (.xlsx),
PDF, and PowerPoint (.pptx) files in addition to existing JPEG/PNG image
uploads in the spec creation and project expansion chat interfaces.

Backend changes:
- New server/utils/document_extraction.py: in-memory text extraction for all
  document formats using python-docx, openpyxl, PyPDF2, python-pptx (no disk
  persistence)
- Rename ImageAttachment to FileAttachment across schemas, routers, and
  chat session services
- Add build_attachment_content_blocks() helper in chat_constants.py to route
  images as image content blocks and documents as extracted text blocks
- Separate size limits: 5MB for images, 20MB for documents
- Handle extraction errors (corrupt files, encrypted PDFs) gracefully

Frontend changes:
- Widen accepted MIME types and file extensions in both chat components
- Add resolveMimeType() fallback for browsers that don't set MIME on .md files
- Document attachments display with FileText icon instead of image thumbnail
- ChatMessage renders documents as compact pills with filename and size
- Update help text from "attach images" to "attach files"

Dependencies added: python-docx, openpyxl, PyPDF2, python-pptx

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Auto
2026-03-25 12:51:17 +02:00
parent fca1f6a5e2
commit 7210c6f066
15 changed files with 513 additions and 123 deletions

View File

@@ -11,7 +11,7 @@ from datetime import datetime
from pathlib import Path
from typing import Literal
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, field_validator, model_validator
# Import model constants from registry (single source of truth)
_root = Path(__file__).parent.parent
@@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel):
# ============================================================================
# Spec Chat Schemas
# Chat Attachment Schemas
# ============================================================================
# Maximum image file size: 5 MB
MAX_IMAGE_SIZE = 5 * 1024 * 1024
# Size limits
MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5 MB for images
MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 # 20 MB for documents
_IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'}
class ImageAttachment(BaseModel):
"""Image attachment from client for spec creation chat."""
class FileAttachment(BaseModel):
"""File attachment from client for spec creation / expand project chat."""
filename: str = Field(..., min_length=1, max_length=255)
mimeType: Literal['image/jpeg', 'image/png']
mimeType: Literal[
'image/jpeg', 'image/png',
'text/plain', 'text/markdown', 'text/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/pdf',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
]
base64Data: str
@field_validator('base64Data')
@classmethod
def validate_base64_and_size(cls, v: str) -> str:
"""Validate that base64 data is valid and within size limit."""
def validate_base64(cls, v: str) -> str:
"""Validate that base64 data is decodable."""
try:
decoded = base64.b64decode(v)
if len(decoded) > MAX_IMAGE_SIZE:
raise ValueError(
f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
)
base64.b64decode(v)
return v
except Exception as e:
if 'Image size' in str(e):
raise
raise ValueError(f'Invalid base64 data: {e}')
@model_validator(mode='after')
def validate_size(self) -> 'FileAttachment':
"""Validate file size based on MIME type."""
try:
decoded = base64.b64decode(self.base64Data)
except Exception:
return self # Already caught by field validator
if self.mimeType in _IMAGE_MIME_TYPES:
max_size = MAX_IMAGE_SIZE
label = "Image"
else:
max_size = MAX_DOCUMENT_SIZE
label = "Document"
if len(decoded) > max_size:
raise ValueError(
f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {max_size // (1024 * 1024)} MB'
)
return self
# ============================================================================
# Filesystem Schemas