Merge pull request #227 from AutoForgeAI/feat/document-file-uploads

feat: add document file upload support for spec creation and project expansion
2026-03-26 07:13:09 +00:00 · 2026-03-25 12:52:44 +02:00
parent fca1f6a5e2 7210c6f066
commit b738859417
15 changed files with 513 additions and 123 deletions
--- a/requirements-prod.txt
+++ b/requirements-prod.txt
@@ -12,3 +12,7 @@ aiofiles>=24.0.0
 apscheduler>=3.10.0,<4.0.0
 pywinpty>=2.0.0; sys_platform == "win32"
 pyyaml>=6.0.0
+python-docx>=1.1.0
+openpyxl>=3.1.0
+PyPDF2>=3.0.0
+python-pptx>=1.0.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,10 @@ aiofiles>=24.0.0
 apscheduler>=3.10.0,<4.0.0
 pywinpty>=2.0.0; sys_platform == "win32"
 pyyaml>=6.0.0
+python-docx>=1.1.0
+openpyxl>=3.1.0
+PyPDF2>=3.0.0
+python-pptx>=1.0.0

 # Dev dependencies
 ruff>=0.8.0
--- a/server/routers/expand_project.py
+++ b/server/routers/expand_project.py
@@ -13,7 +13,7 @@ from typing import Optional
 from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
 from pydantic import BaseModel, ValidationError

-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
 from ..services.expand_chat_session import (
    ExpandChatSession,
    create_expand_session,
@@ -181,12 +181,12 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str):
                    user_content = message.get("content", "").strip()

                    # Parse attachments if present
-                    attachments: list[ImageAttachment] = []
+                    attachments: list[FileAttachment] = []
                    raw_attachments = message.get("attachments", [])
                    if raw_attachments:
                        try:
                            for raw_att in raw_attachments:
-                                attachments.append(ImageAttachment(**raw_att))
+                                attachments.append(FileAttachment(**raw_att))
                        except (ValidationError, Exception) as e:
                            logger.warning(f"Invalid attachment data: {e}")
                            await websocket.send_json({
--- a/server/routers/spec_creation.py
+++ b/server/routers/spec_creation.py
@@ -12,7 +12,7 @@ from typing import Optional
 from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
 from pydantic import BaseModel, ValidationError

-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
 from ..services.spec_chat_session import (
    SpecChatSession,
    create_session,
@@ -242,12 +242,12 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
                    user_content = message.get("content", "").strip()

                    # Parse attachments if present
-                    attachments: list[ImageAttachment] = []
+                    attachments: list[FileAttachment] = []
                    raw_attachments = message.get("attachments", [])
                    if raw_attachments:
                        try:
                            for raw_att in raw_attachments:
-                                attachments.append(ImageAttachment(**raw_att))
+                                attachments.append(FileAttachment(**raw_att))
                        except (ValidationError, Exception) as e:
                            logger.warning(f"Invalid attachment data: {e}")
                            await websocket.send_json({
--- a/server/schemas.py
+++ b/server/schemas.py
@@ -11,7 +11,7 @@ from datetime import datetime
 from pathlib import Path
 from typing import Literal

-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field, field_validator, model_validator

 # Import model constants from registry (single source of truth)
 _root = Path(__file__).parent.parent
@@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel):


 # ============================================================================
-# Spec Chat Schemas
+# Chat Attachment Schemas
 # ============================================================================

-# Maximum image file size: 5 MB
-MAX_IMAGE_SIZE = 5 * 1024 * 1024
+# Size limits
+MAX_IMAGE_SIZE = 5 * 1024 * 1024      # 5 MB for images
+MAX_DOCUMENT_SIZE = 20 * 1024 * 1024   # 20 MB for documents
+
+_IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'}


-class ImageAttachment(BaseModel):
-    """Image attachment from client for spec creation chat."""
+class FileAttachment(BaseModel):
+    """File attachment from client for spec creation / expand project chat."""
    filename: str = Field(..., min_length=1, max_length=255)
-    mimeType: Literal['image/jpeg', 'image/png']
+    mimeType: Literal[
+        'image/jpeg', 'image/png',
+        'text/plain', 'text/markdown', 'text/csv',
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        'application/pdf',
+        'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    ]
    base64Data: str

    @field_validator('base64Data')
    @classmethod
-    def validate_base64_and_size(cls, v: str) -> str:
-        """Validate that base64 data is valid and within size limit."""
+    def validate_base64(cls, v: str) -> str:
+        """Validate that base64 data is decodable."""
        try:
-            decoded = base64.b64decode(v)
-            if len(decoded) > MAX_IMAGE_SIZE:
-                raise ValueError(
-                    f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
-                    f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
-                )
+            base64.b64decode(v)
            return v
        except Exception as e:
-            if 'Image size' in str(e):
-                raise
            raise ValueError(f'Invalid base64 data: {e}')

+    @model_validator(mode='after')
+    def validate_size(self) -> 'FileAttachment':
+        """Validate file size based on MIME type."""
+        try:
+            decoded = base64.b64decode(self.base64Data)
+        except Exception:
+            return self  # Already caught by field validator
+
+        if self.mimeType in _IMAGE_MIME_TYPES:
+            max_size = MAX_IMAGE_SIZE
+            label = "Image"
+        else:
+            max_size = MAX_DOCUMENT_SIZE
+            label = "Document"
+
+        if len(decoded) > max_size:
+            raise ValueError(
+                f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
+                f'maximum of {max_size // (1024 * 1024)} MB'
+            )
+        return self
+

 # ============================================================================
 # Filesystem Schemas
--- a/server/services/chat_constants.py
+++ b/server/services/chat_constants.py
@@ -35,6 +35,13 @@ if _root_str not in sys.path:
 from env_constants import API_ENV_VARS  # noqa: E402, F401
 from rate_limit_utils import is_rate_limit_error, parse_retry_after  # noqa: E402, F401

+from ..schemas import FileAttachment
+from ..utils.document_extraction import (
+    extract_text_from_document,
+    is_document,
+    is_image,
+)
+
 logger = logging.getLogger(__name__)


@@ -88,6 +95,35 @@ async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenera
            raise


+def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
+    """Convert FileAttachment objects to Claude API content blocks.
+
+    Images become image content blocks (passed directly to Claude's vision).
+    Documents are extracted to text and become text content blocks.
+
+    Raises:
+        DocumentExtractionError: If a document cannot be read.
+    """
+    blocks: list[dict] = []
+    for att in attachments:
+        if is_image(att.mimeType):
+            blocks.append({
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": att.mimeType,
+                    "data": att.base64Data,
+                }
+            })
+        elif is_document(att.mimeType):
+            text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
+            blocks.append({
+                "type": "text",
+                "text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
+            })
+    return blocks
+
+
 async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
    """Yield a single multimodal user message in Claude Agent SDK format.

--- a/server/services/expand_chat_session.py
+++ b/server/services/expand_chat_session.py
@@ -21,9 +21,11 @@ from typing import Any, AsyncGenerator, Optional
 from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv

-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
+from ..utils.document_extraction import DocumentExtractionError
 from .chat_constants import (
    ROOT_DIR,
+    build_attachment_content_blocks,
    check_rate_limit_error,
    make_multimodal_message,
    safe_receive_response,
@@ -226,7 +228,7 @@ class ExpandChatSession:
    async def send_message(
        self,
        user_message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
    ) -> AsyncGenerator[dict, None]:
        """
        Send user message and stream Claude's response.
@@ -273,7 +275,7 @@ class ExpandChatSession:
    async def _query_claude(
        self,
        message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
    ) -> AsyncGenerator[dict, None]:
        """
        Internal method to query Claude and stream responses.
@@ -289,17 +291,16 @@ class ExpandChatSession:
            content_blocks: list[dict[str, Any]] = []
            if message:
                content_blocks.append({"type": "text", "text": message})
-            for att in attachments:
-                content_blocks.append({
-                    "type": "image",
-                    "source": {
-                        "type": "base64",
-                        "media_type": att.mimeType,
-                        "data": att.base64Data,
-                    }
-                })
+
+            # Add attachment blocks (images as image blocks, documents as extracted text)
+            try:
+                content_blocks.extend(build_attachment_content_blocks(attachments))
+            except DocumentExtractionError as e:
+                yield {"type": "error", "content": str(e)}
+                return
+
            await self.client.query(make_multimodal_message(content_blocks))
-            logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
+            logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
        else:
            await self.client.query(message)

--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -18,9 +18,11 @@ from typing import Any, AsyncGenerator, Optional
 from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv

-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
+from ..utils.document_extraction import DocumentExtractionError
 from .chat_constants import (
    ROOT_DIR,
+    build_attachment_content_blocks,
    check_rate_limit_error,
    make_multimodal_message,
    safe_receive_response,
@@ -201,7 +203,7 @@ class SpecChatSession:
    async def send_message(
        self,
        user_message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
    ) -> AsyncGenerator[dict, None]:
        """
        Send user message and stream Claude's response.
@@ -247,7 +249,7 @@ class SpecChatSession:
    async def _query_claude(
        self,
        message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
    ) -> AsyncGenerator[dict, None]:
        """
        Internal method to query Claude and stream responses.
@@ -273,21 +275,17 @@ class SpecChatSession:
            if message:
                content_blocks.append({"type": "text", "text": message})

-            # Add image blocks
-            for att in attachments:
-                content_blocks.append({
-                    "type": "image",
-                    "source": {
-                        "type": "base64",
-                        "media_type": att.mimeType,
-                        "data": att.base64Data,
-                    }
-                })
+            # Add attachment blocks (images as image blocks, documents as extracted text)
+            try:
+                content_blocks.extend(build_attachment_content_blocks(attachments))
+            except DocumentExtractionError as e:
+                yield {"type": "error", "content": str(e)}
+                return

            # Send multimodal content to Claude using async generator format
            # The SDK's query() accepts AsyncIterable[dict] for custom message formats
            await self.client.query(make_multimodal_message(content_blocks))
-            logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
+            logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
        else:
            # Text-only message: use string format
            await self.client.query(message)
--- a/server/utils/document_extraction.py
+++ b/server/utils/document_extraction.py
@@ -0,0 +1,221 @@
+"""
+Document Extraction Utility
+============================
+
+Extracts text content from various document formats in memory (no disk I/O).
+Supports: TXT, MD, CSV, DOCX, XLSX, PDF, PPTX.
+"""
+
+import base64
+import csv
+import io
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Maximum characters of extracted text to send to Claude
+MAX_EXTRACTED_CHARS = 200_000
+
+# Maximum rows per sheet for Excel files
+MAX_EXCEL_ROWS_PER_SHEET = 10_000
+MAX_EXCEL_SHEETS = 50
+
+# MIME type classification
+DOCUMENT_MIME_TYPES: dict[str, str] = {
+    "text/plain": ".txt",
+    "text/markdown": ".md",
+    "text/csv": ".csv",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
+    "application/pdf": ".pdf",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
+}
+
+IMAGE_MIME_TYPES = {"image/jpeg", "image/png"}
+
+ALL_ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES | set(DOCUMENT_MIME_TYPES.keys())
+
+
+def is_image(mime_type: str) -> bool:
+    """Check if the MIME type is a supported image format."""
+    return mime_type in IMAGE_MIME_TYPES
+
+
+def is_document(mime_type: str) -> bool:
+    """Check if the MIME type is a supported document format."""
+    return mime_type in DOCUMENT_MIME_TYPES
+
+
+class DocumentExtractionError(Exception):
+    """Raised when text extraction from a document fails."""
+
+    def __init__(self, filename: str, reason: str):
+        self.filename = filename
+        self.reason = reason
+        super().__init__(f"Failed to read {filename}: {reason}")
+
+
+def _truncate(text: str) -> str:
+    """Truncate text if it exceeds the maximum character limit."""
+    if len(text) > MAX_EXTRACTED_CHARS:
+        omitted = len(text) - MAX_EXTRACTED_CHARS
+        return text[:MAX_EXTRACTED_CHARS] + f"\n\n[... truncated, {omitted:,} characters omitted]"
+    return text
+
+
+def _extract_plain_text(data: bytes) -> str:
+    """Extract text from plain text or markdown files."""
+    try:
+        return data.decode("utf-8")
+    except UnicodeDecodeError:
+        return data.decode("latin-1")
+
+
+def _extract_csv(data: bytes) -> str:
+    """Extract text from CSV files, formatted as a readable table."""
+    try:
+        text = data.decode("utf-8")
+    except UnicodeDecodeError:
+        text = data.decode("latin-1")
+
+    reader = csv.reader(io.StringIO(text))
+    lines = []
+    for i, row in enumerate(reader):
+        lines.append(f"Row {i + 1}: {', '.join(row)}")
+    return "\n".join(lines)
+
+
+def _extract_docx(data: bytes) -> str:
+    """Extract text from Word documents."""
+    from docx import Document
+
+    doc = Document(io.BytesIO(data))
+    paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+    return "\n\n".join(paragraphs)
+
+
+def _extract_xlsx(data: bytes) -> str:
+    """Extract text from Excel spreadsheets."""
+    from openpyxl import load_workbook
+
+    wb = load_workbook(io.BytesIO(data), read_only=True, data_only=True)
+    sections = []
+
+    for sheet_idx, sheet_name in enumerate(wb.sheetnames):
+        if sheet_idx >= MAX_EXCEL_SHEETS:
+            sections.append(f"\n[... {len(wb.sheetnames) - MAX_EXCEL_SHEETS} more sheets omitted]")
+            break
+
+        ws = wb[sheet_name]
+        rows_text = [f"=== Sheet: {sheet_name} ==="]
+        row_count = 0
+
+        for row in ws.iter_rows(values_only=True):
+            if row_count >= MAX_EXCEL_ROWS_PER_SHEET:
+                rows_text.append(f"[... more rows omitted, limit {MAX_EXCEL_ROWS_PER_SHEET:,} rows/sheet]")
+                break
+            cells = [str(cell) if cell is not None else "" for cell in row]
+            rows_text.append("\t".join(cells))
+            row_count += 1
+
+        sections.append("\n".join(rows_text))
+
+    wb.close()
+    return "\n\n".join(sections)
+
+
+def _extract_pdf(data: bytes, filename: str) -> str:
+    """Extract text from PDF files."""
+    from PyPDF2 import PdfReader
+    from PyPDF2.errors import PdfReadError
+
+    try:
+        reader = PdfReader(io.BytesIO(data))
+    except PdfReadError as e:
+        if "encrypt" in str(e).lower() or "password" in str(e).lower():
+            raise DocumentExtractionError(filename, "PDF is password-protected")
+        raise
+
+    if reader.is_encrypted:
+        raise DocumentExtractionError(filename, "PDF is password-protected")
+
+    pages = []
+    for i, page in enumerate(reader.pages):
+        text = page.extract_text()
+        if text and text.strip():
+            pages.append(f"--- Page {i + 1} ---\n{text}")
+
+    return "\n\n".join(pages)
+
+
+def _extract_pptx(data: bytes) -> str:
+    """Extract text from PowerPoint presentations."""
+    from pptx import Presentation
+
+    prs = Presentation(io.BytesIO(data))
+    slides_text = []
+
+    for i, slide in enumerate(prs.slides):
+        texts = []
+        for shape in slide.shapes:
+            if shape.has_text_frame:
+                for paragraph in shape.text_frame.paragraphs:
+                    text = paragraph.text.strip()
+                    if text:
+                        texts.append(text)
+        if texts:
+            slides_text.append(f"--- Slide {i + 1} ---\n" + "\n".join(texts))
+
+    return "\n\n".join(slides_text)
+
+
+def extract_text_from_document(base64_data: str, mime_type: str, filename: str) -> str:
+    """
+    Extract text content from a document file.
+
+    Args:
+        base64_data: Base64-encoded file content
+        mime_type: MIME type of the document
+        filename: Original filename (for error messages)
+
+    Returns:
+        Extracted text content, truncated if necessary
+
+    Raises:
+        DocumentExtractionError: If extraction fails
+    """
+    if mime_type not in DOCUMENT_MIME_TYPES:
+        raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
+
+    try:
+        data = base64.b64decode(base64_data)
+    except Exception as e:
+        raise DocumentExtractionError(filename, f"invalid base64 data: {e}")
+
+    try:
+        if mime_type in ("text/plain", "text/markdown"):
+            text = _extract_plain_text(data)
+        elif mime_type == "text/csv":
+            text = _extract_csv(data)
+        elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            text = _extract_docx(data)
+        elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+            text = _extract_xlsx(data)
+        elif mime_type == "application/pdf":
+            text = _extract_pdf(data, filename)
+        elif mime_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
+            text = _extract_pptx(data)
+        else:
+            raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
+    except DocumentExtractionError:
+        raise
+    except Exception as e:
+        logger.warning(f"Document extraction failed for {filename}: {e}")
+        raise DocumentExtractionError(
+            filename, "file appears to be corrupt or in an unexpected format"
+        )
+
+    if not text or not text.strip():
+        return f"[File {filename} is empty or contains no extractable text]"
+
+    return _truncate(text)
--- a/ui/src/components/ChatMessage.tsx
+++ b/ui/src/components/ChatMessage.tsx
@@ -6,10 +6,11 @@
 */

 import { memo } from 'react'
-import { Bot, User, Info } from 'lucide-react'
+import { Bot, User, Info, FileText } from 'lucide-react'
 import ReactMarkdown, { type Components } from 'react-markdown'
 import remarkGfm from 'remark-gfm'
 import type { ChatMessage as ChatMessageType } from '../lib/types'
+import { isImageAttachment } from '../lib/types'
 import { Card } from '@/components/ui/card'

 interface ChatMessageProps {
@@ -104,21 +105,35 @@ export const ChatMessage = memo(function ChatMessage({ message }: ChatMessagePro
              </div>
            )}

-            {/* Display image attachments */}
+            {/* Display file attachments */}
            {attachments && attachments.length > 0 && (
              <div className={`flex flex-wrap gap-2 ${content ? 'mt-3' : ''}`}>
                {attachments.map((attachment) => (
                  <div key={attachment.id} className="border border-border rounded p-1 bg-card">
-                    <img
-                      src={attachment.previewUrl}
-                      alt={attachment.filename}
-                      className="max-w-48 max-h-48 object-contain cursor-pointer hover:opacity-90 transition-opacity rounded"
-                      onClick={() => window.open(attachment.previewUrl, '_blank')}
-                      title={`${attachment.filename} (click to enlarge)`}
-                    />
-                    <span className="text-xs text-muted-foreground block mt-1 text-center">
-                      {attachment.filename}
-                    </span>
+                    {isImageAttachment(attachment) ? (
+                      <>
+                        <img
+                          src={attachment.previewUrl}
+                          alt={attachment.filename}
+                          className="max-w-48 max-h-48 object-contain cursor-pointer hover:opacity-90 transition-opacity rounded"
+                          onClick={() => window.open(attachment.previewUrl, '_blank')}
+                          title={`${attachment.filename} (click to enlarge)`}
+                        />
+                        <span className="text-xs text-muted-foreground block mt-1 text-center">
+                          {attachment.filename}
+                        </span>
+                      </>
+                    ) : (
+                      <div className="flex items-center gap-2 px-2 py-1">
+                        <FileText size={16} className="text-muted-foreground flex-shrink-0" />
+                        <span className="text-xs text-muted-foreground">
+                          {attachment.filename}
+                        </span>
+                        <span className="text-xs text-muted-foreground/60">
+                          ({(attachment.size / 1024).toFixed(0)} KB)
+                        </span>
+                      </div>
+                    )}
                  </div>
                ))}
              </div>
--- a/ui/src/components/ExpandProjectChat.tsx
+++ b/ui/src/components/ExpandProjectChat.tsx
@@ -6,20 +6,22 @@
 */

 import { useCallback, useEffect, useRef, useState } from 'react'
-import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus } from 'lucide-react'
+import { Send, X, CheckCircle2, AlertCircle, Wifi, WifiOff, RotateCcw, Paperclip, Plus, FileText } from 'lucide-react'
 import { useExpandChat } from '../hooks/useExpandChat'
 import { ChatMessage } from './ChatMessage'
 import { TypingIndicator } from './TypingIndicator'
-import type { ImageAttachment } from '../lib/types'
+import type { FileAttachment } from '../lib/types'
+import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types'
 import { isSubmitEnter } from '../lib/keyboard'
 import { Button } from '@/components/ui/button'
 import { Input } from '@/components/ui/input'
 import { Card, CardContent } from '@/components/ui/card'
 import { Alert, AlertDescription } from '@/components/ui/alert'

-// Image upload validation constants
-const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB
-const ALLOWED_TYPES = ['image/jpeg', 'image/png']
+// File upload validation constants
+const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images
+const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents
+const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png']

 interface ExpandProjectChatProps {
  projectName: string
@@ -34,7 +36,7 @@ export function ExpandProjectChat({
 }: ExpandProjectChatProps) {
  const [input, setInput] = useState('')
  const [error, setError] = useState<string | null>(null)
-  const [pendingAttachments, setPendingAttachments] = useState<ImageAttachment[]>([])
+  const [pendingAttachments, setPendingAttachments] = useState<FileAttachment[]>([])
  const messagesEndRef = useRef<HTMLDivElement>(null)
  const inputRef = useRef<HTMLInputElement>(null)
  const fileInputRef = useRef<HTMLInputElement>(null)
@@ -95,20 +97,33 @@ export function ExpandProjectChat({
    }
  }

-  // File handling for image attachments
+  // File handling for attachments (images and documents)
  const handleFileSelect = useCallback((files: FileList | null) => {
    if (!files) return

    Array.from(files).forEach((file) => {
-      // Validate file type
-      if (!ALLOWED_TYPES.includes(file.type)) {
-        setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`)
-        return
+      // Resolve MIME type (browsers may not set it for .md files)
+      let mimeType = file.type
+      if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
+        mimeType = resolveMimeType(file.name)
      }

-      // Validate file size
-      if (file.size > MAX_FILE_SIZE) {
-        setError(`File too large: ${file.name}. Maximum size is 5 MB.`)
+      // Validate file type
+      if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
+        const ext = file.name.split('.').pop()?.toLowerCase()
+        if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) {
+          setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`)
+          return
+        }
+        mimeType = resolveMimeType(file.name)
+      }
+
+      // Validate size based on type
+      const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType)
+      const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE
+      const maxLabel = isImage ? '5 MB' : '20 MB'
+      if (file.size > maxSize) {
+        setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`)
        return
      }

@@ -118,12 +133,12 @@ export function ExpandProjectChat({
        const dataUrl = e.target?.result as string
        const base64Data = dataUrl.split(',')[1]

-        const attachment: ImageAttachment = {
+        const attachment: FileAttachment = {
          id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
          filename: file.name,
-          mimeType: file.type as 'image/jpeg' | 'image/png',
+          mimeType: mimeType as FileAttachment['mimeType'],
          base64Data,
-          previewUrl: dataUrl,
+          previewUrl: isImage ? dataUrl : '',
          size: file.size,
        }

@@ -291,11 +306,17 @@ export function ExpandProjectChat({
                  key={attachment.id}
                  className="relative group border-2 border-border p-1 bg-card rounded shadow-sm"
                >
-                  <img
-                    src={attachment.previewUrl}
-                    alt={attachment.filename}
-                    className="w-16 h-16 object-cover rounded"
-                  />
+                  {isImageAttachment(attachment) ? (
+                    <img
+                      src={attachment.previewUrl}
+                      alt={attachment.filename}
+                      className="w-16 h-16 object-cover rounded"
+                    />
+                  ) : (
+                    <div className="w-16 h-16 flex items-center justify-center bg-muted rounded">
+                      <FileText size={24} className="text-muted-foreground" />
+                    </div>
+                  )}
                  <button
                    onClick={() => handleRemoveAttachment(attachment.id)}
                    className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform"
@@ -318,7 +339,7 @@ export function ExpandProjectChat({
            <input
              ref={fileInputRef}
              type="file"
-              accept="image/jpeg,image/png"
+              accept="image/jpeg,image/png,.md,.txt,.csv,.docx,.xlsx,.pdf,.pptx"
              multiple
              onChange={(e) => handleFileSelect(e.target.files)}
              className="hidden"
@@ -330,7 +351,7 @@ export function ExpandProjectChat({
              disabled={connectionStatus !== 'connected'}
              variant="ghost"
              size="icon"
-              title="Attach image (JPEG, PNG - max 5MB)"
+              title="Attach files (images: JPEG/PNG up to 5MB, documents: MD, TXT, CSV, DOCX, XLSX, PDF, PPTX up to 20MB)"
            >
              <Paperclip size={18} />
            </Button>
@@ -364,7 +385,7 @@ export function ExpandProjectChat({

          {/* Help text */}
          <p className="text-xs text-muted-foreground mt-2">
-            Press Enter to send. Drag & drop or click <Paperclip size={12} className="inline" /> to attach images.
+            Press Enter to send. Drag & drop or click <Paperclip size={12} className="inline" /> to attach files.
          </p>
        </div>
      )}
--- a/ui/src/components/SpecCreationChat.tsx
+++ b/ui/src/components/SpecCreationChat.tsx
@@ -11,16 +11,18 @@ import { useSpecChat } from '../hooks/useSpecChat'
 import { ChatMessage } from './ChatMessage'
 import { QuestionOptions } from './QuestionOptions'
 import { TypingIndicator } from './TypingIndicator'
-import type { ImageAttachment } from '../lib/types'
+import type { FileAttachment } from '../lib/types'
+import { ALL_ALLOWED_MIME_TYPES, IMAGE_MIME_TYPES, isImageAttachment, resolveMimeType } from '../lib/types'
 import { isSubmitEnter } from '../lib/keyboard'
 import { Button } from '@/components/ui/button'
 import { Textarea } from '@/components/ui/textarea'
 import { Card, CardContent } from '@/components/ui/card'
 import { Alert, AlertDescription } from '@/components/ui/alert'

-// Image upload validation constants
-const MAX_FILE_SIZE = 5 * 1024 * 1024 // 5 MB
-const ALLOWED_TYPES = ['image/jpeg', 'image/png']
+// File upload validation constants
+const MAX_IMAGE_SIZE = 5 * 1024 * 1024 // 5 MB for images
+const MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 // 20 MB for documents
+const ALLOWED_EXTENSIONS = ['md', 'txt', 'csv', 'docx', 'xlsx', 'pdf', 'pptx', 'jpg', 'jpeg', 'png']

 // Sample prompt for quick testing
 const SAMPLE_PROMPT = `Let's call it Simple Todo. This is a really simple web app that I can use to track my to-do items using a Kanban board. I should be able to add to-dos and then drag and drop them through the Kanban board. The different columns in the Kanban board are:
@@ -64,7 +66,7 @@ export function SpecCreationChat({
  const [input, setInput] = useState('')
  const [error, setError] = useState<string | null>(null)
  const [yoloEnabled, setYoloEnabled] = useState(false)
-  const [pendingAttachments, setPendingAttachments] = useState<ImageAttachment[]>([])
+  const [pendingAttachments, setPendingAttachments] = useState<FileAttachment[]>([])
  const messagesEndRef = useRef<HTMLDivElement>(null)
  const inputRef = useRef<HTMLTextAreaElement>(null)
  const fileInputRef = useRef<HTMLInputElement>(null)
@@ -138,20 +140,33 @@ export function SpecCreationChat({
    sendAnswer(answers)
  }

-  // File handling for image attachments
+  // File handling for attachments (images and documents)
  const handleFileSelect = useCallback((files: FileList | null) => {
    if (!files) return

    Array.from(files).forEach((file) => {
-      // Validate file type
-      if (!ALLOWED_TYPES.includes(file.type)) {
-        setError(`Invalid file type: ${file.name}. Only JPEG and PNG are supported.`)
-        return
+      // Resolve MIME type (browsers may not set it for .md files)
+      let mimeType = file.type
+      if (!mimeType || !ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
+        mimeType = resolveMimeType(file.name)
      }

-      // Validate file size
-      if (file.size > MAX_FILE_SIZE) {
-        setError(`File too large: ${file.name}. Maximum size is 5 MB.`)
+      // Validate file type
+      if (!ALL_ALLOWED_MIME_TYPES.includes(mimeType)) {
+        const ext = file.name.split('.').pop()?.toLowerCase()
+        if (!ext || !ALLOWED_EXTENSIONS.includes(ext)) {
+          setError(`Unsupported file type: ${file.name}. Supported: images (JPEG, PNG) and documents (MD, TXT, CSV, DOCX, XLSX, PDF, PPTX).`)
+          return
+        }
+        mimeType = resolveMimeType(file.name)
+      }
+
+      // Validate size based on type
+      const isImage = (IMAGE_MIME_TYPES as readonly string[]).includes(mimeType)
+      const maxSize = isImage ? MAX_IMAGE_SIZE : MAX_DOCUMENT_SIZE
+      const maxLabel = isImage ? '5 MB' : '20 MB'
+      if (file.size > maxSize) {
+        setError(`File too large: ${file.name}. Maximum size is ${maxLabel}.`)
        return
      }

@@ -159,15 +174,14 @@ export function SpecCreationChat({
      const reader = new FileReader()
      reader.onload = (e) => {
        const dataUrl = e.target?.result as string
-        // dataUrl is "data:image/png;base64,XXXXXX"
        const base64Data = dataUrl.split(',')[1]

-        const attachment: ImageAttachment = {
+        const attachment: FileAttachment = {
          id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
          filename: file.name,
-          mimeType: file.type as 'image/jpeg' | 'image/png',
+          mimeType: mimeType as FileAttachment['mimeType'],
          base64Data,
-          previewUrl: dataUrl,
+          previewUrl: isImage ? dataUrl : '',
          size: file.size,
        }

@@ -364,11 +378,17 @@ export function SpecCreationChat({
                  key={attachment.id}
                  className="relative group border-2 border-border p-1 bg-card rounded shadow-sm"
                >
-                  <img
-                    src={attachment.previewUrl}
-                    alt={attachment.filename}
-                    className="w-16 h-16 object-cover rounded"
-                  />
+                  {isImageAttachment(attachment) ? (
+                    <img
+                      src={attachment.previewUrl}
+                      alt={attachment.filename}
+                      className="w-16 h-16 object-cover rounded"
+                    />
+                  ) : (
+                    <div className="w-16 h-16 flex items-center justify-center bg-muted rounded">
+                      <FileText size={24} className="text-muted-foreground" />
+                    </div>
+                  )}
                  <button
                    onClick={() => handleRemoveAttachment(attachment.id)}
                    className="absolute -top-2 -right-2 bg-destructive text-destructive-foreground rounded-full p-0.5 border-2 border-border hover:scale-110 transition-transform"
@@ -391,7 +411,7 @@ export function SpecCreationChat({
            <input
              ref={fileInputRef}
              type="file"
-              accept="image/jpeg,image/png"
+              accept="image/jpeg,image/png,.md,.txt,.csv,.docx,.xlsx,.pdf,.pptx"
              multiple
              onChange={(e) => handleFileSelect(e.target.files)}
              className="hidden"
@@ -403,7 +423,7 @@ export function SpecCreationChat({
              disabled={connectionStatus !== 'connected'}
              variant="ghost"
              size="icon"
-              title="Attach image (JPEG, PNG - max 5MB)"
+              title="Attach files (images: JPEG/PNG up to 5MB, documents: MD, TXT, CSV, DOCX, XLSX, PDF, PPTX up to 20MB)"
            >
              <Paperclip size={18} />
            </Button>
@@ -444,7 +464,7 @@ export function SpecCreationChat({

          {/* Help text */}
          <p className="text-xs text-muted-foreground mt-2">
-            Press Enter to send, Shift+Enter for new line. Drag & drop or click <Paperclip size={12} className="inline" /> to attach images (JPEG/PNG, max 5MB).
+            Press Enter to send, Shift+Enter for new line. Drag & drop or click <Paperclip size={12} className="inline" /> to attach files.
          </p>
        </div>
      )}
--- a/ui/src/hooks/useExpandChat.ts
+++ b/ui/src/hooks/useExpandChat.ts
@@ -3,7 +3,7 @@
 */

 import { useState, useCallback, useRef, useEffect } from 'react'
-import type { ChatMessage, ImageAttachment, ExpandChatServerMessage } from '../lib/types'
+import type { ChatMessage, FileAttachment, ExpandChatServerMessage } from '../lib/types'

 type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'

@@ -27,7 +27,7 @@ interface UseExpandChatReturn {
  featuresCreated: number
  recentFeatures: CreatedFeature[]
  start: () => void
-  sendMessage: (content: string, attachments?: ImageAttachment[]) => void
+  sendMessage: (content: string, attachments?: FileAttachment[]) => void
  disconnect: () => void
 }

@@ -278,7 +278,7 @@ export function useExpandChat({
    setTimeout(checkAndSend, 100)
  }, [connect, onError])

-  const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => {
+  const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => {
    if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
      onError?.('Not connected')
      return
--- a/ui/src/hooks/useSpecChat.ts
+++ b/ui/src/hooks/useSpecChat.ts
@@ -3,7 +3,7 @@
 */

 import { useState, useCallback, useRef, useEffect } from 'react'
-import type { ChatMessage, ImageAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types'
+import type { ChatMessage, FileAttachment, SpecChatServerMessage, SpecQuestion } from '../lib/types'
 import { getSpecStatus } from '../lib/api'

 type ConnectionStatus = 'disconnected' | 'connecting' | 'connected' | 'error'
@@ -22,7 +22,7 @@ interface UseSpecChatReturn {
  currentQuestions: SpecQuestion[] | null
  currentToolId: string | null
  start: () => void
-  sendMessage: (content: string, attachments?: ImageAttachment[]) => void
+  sendMessage: (content: string, attachments?: FileAttachment[]) => void
  sendAnswer: (answers: Record<string, string | string[]>) => void
  disconnect: () => void
 }
@@ -367,7 +367,7 @@ export function useSpecChat({
    setTimeout(checkAndSend, 100)
  }, [connect])

-  const sendMessage = useCallback((content: string, attachments?: ImageAttachment[]) => {
+  const sendMessage = useCallback((content: string, attachments?: FileAttachment[]) => {
    if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
      onError?.('Not connected')
      return
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -417,22 +417,67 @@ export type SpecChatServerMessage =
  | SpecChatPongMessage
  | SpecChatResponseDoneMessage

-// Image attachment for chat messages
-export interface ImageAttachment {
+// File attachment for chat messages (images and documents)
+export interface FileAttachment {
  id: string
  filename: string
-  mimeType: 'image/jpeg' | 'image/png'
+  mimeType:
+    | 'image/jpeg'
+    | 'image/png'
+    | 'text/plain'
+    | 'text/markdown'
+    | 'text/csv'
+    | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+    | 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
+    | 'application/pdf'
+    | 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
  base64Data: string    // Raw base64 (without data: prefix)
-  previewUrl: string    // data: URL for display
+  previewUrl: string    // data: URL for images, empty string for documents
  size: number          // File size in bytes
 }

+/** @deprecated Use FileAttachment instead */
+export type ImageAttachment = FileAttachment
+
+export const IMAGE_MIME_TYPES = ['image/jpeg', 'image/png'] as const
+export const DOCUMENT_MIME_TYPES = [
+  'text/plain',
+  'text/markdown',
+  'text/csv',
+  'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+  'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+  'application/pdf',
+  'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+] as const
+export const ALL_ALLOWED_MIME_TYPES: string[] = [...IMAGE_MIME_TYPES, ...DOCUMENT_MIME_TYPES]
+
+export function isImageAttachment(att: FileAttachment): boolean {
+  return (IMAGE_MIME_TYPES as readonly string[]).includes(att.mimeType)
+}
+
+export function resolveMimeType(filename: string): string {
+  const ext = filename.split('.').pop()?.toLowerCase()
+  const map: Record<string, string> = {
+    md: 'text/markdown',
+    txt: 'text/plain',
+    csv: 'text/csv',
+    docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+    xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+    pdf: 'application/pdf',
+    pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    jpg: 'image/jpeg',
+    jpeg: 'image/jpeg',
+    png: 'image/png',
+  }
+  return map[ext || ''] || 'application/octet-stream'
+}
+
 // UI chat message for display
 export interface ChatMessage {
  id: string
  role: 'user' | 'assistant' | 'system'
  content: string
-  attachments?: ImageAttachment[]
+  attachments?: FileAttachment[]
  timestamp: Date
  questions?: SpecQuestion[]
  isStreaming?: boolean