Files
autocoder/server/services/chat_constants.py
Auto 7210c6f066 feat: add document file upload support for spec creation and project expansion
Add support for uploading Markdown, Text, Word (.docx), CSV, Excel (.xlsx),
PDF, and PowerPoint (.pptx) files in addition to existing JPEG/PNG image
uploads in the spec creation and project expansion chat interfaces.

Backend changes:
- New server/utils/document_extraction.py: in-memory text extraction for all
  document formats using python-docx, openpyxl, PyPDF2, python-pptx (no disk
  persistence)
- Rename ImageAttachment to FileAttachment across schemas, routers, and
  chat session services
- Add build_attachment_content_blocks() helper in chat_constants.py to route
  images as image content blocks and documents as extracted text blocks
- Separate size limits: 5MB for images, 20MB for documents
- Handle extraction errors (corrupt files, encrypted PDFs) gracefully

Frontend changes:
- Widen accepted MIME types and file extensions in both chat components
- Add resolveMimeType() fallback for browsers that don't set MIME on .md files
- Document attachments display with FileText icon instead of image thumbnail
- ChatMessage renders documents as compact pills with filename and size
- Update help text from "attach images" to "attach files"

Dependencies added: python-docx, openpyxl, PyPDF2, python-pptx

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 12:51:17 +02:00

148 lines
5.4 KiB
Python

"""
Chat Session Constants
======================
Shared constants for all chat session types (assistant, spec, expand).
The canonical ``API_ENV_VARS`` list lives in ``env_constants.py`` at the
project root and is re-exported here for convenience so that existing
imports (``from .chat_constants import API_ENV_VARS``) continue to work.
"""
import logging
import sys
from pathlib import Path
from typing import Any, AsyncGenerator
# -------------------------------------------------------------------
# Root directory of the autoforge project (repository root).
# Used throughout the server package whenever the repo root is needed.
# -------------------------------------------------------------------
ROOT_DIR = Path(__file__).parent.parent.parent
# Ensure the project root is on sys.path so we can import env_constants
# from the root-level module without requiring a package install.
_root_str = str(ROOT_DIR)
if _root_str not in sys.path:
sys.path.insert(0, _root_str)
# -------------------------------------------------------------------
# Environment variables forwarded to Claude CLI subprocesses.
# Single source of truth lives in env_constants.py at the project root.
# Re-exported here so existing ``from .chat_constants import API_ENV_VARS``
# imports continue to work unchanged.
# -------------------------------------------------------------------
from env_constants import API_ENV_VARS # noqa: E402, F401
from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401
from ..schemas import FileAttachment
from ..utils.document_extraction import (
extract_text_from_document,
is_document,
is_image,
)
logger = logging.getLogger(__name__)
def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
"""Inspect an exception and determine if it represents a rate-limit.
Returns ``(is_rate_limit, retry_seconds)``. ``retry_seconds`` is the
parsed Retry-After value when available, otherwise ``None`` (caller
should use exponential backoff).
"""
# MessageParseError = unknown CLI message type (e.g. "rate_limit_event").
# These are informational events, NOT actual rate limit errors.
# The word "rate_limit" in the type name would false-positive the regex.
if type(exc).__name__ == "MessageParseError":
return False, None
# For all other exceptions: match error text against known rate-limit patterns
exc_str = str(exc)
if is_rate_limit_error(exc_str):
retry = parse_retry_after(exc_str)
return True, retry
return False, None
async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenerator:
"""Wrap ``client.receive_response()`` to skip ``MessageParseError``.
The Claude Code CLI may emit message types (e.g. ``rate_limit_event``)
that the installed Python SDK does not recognise, causing
``MessageParseError`` which kills the async generator. The CLI
subprocess is still alive and the SDK uses a buffered memory channel,
so we restart ``receive_response()`` to continue reading remaining
messages without losing data.
"""
max_retries = 50
retries = 0
while True:
try:
async for msg in client.receive_response():
yield msg
return # Normal completion
except Exception as exc:
if type(exc).__name__ == "MessageParseError":
retries += 1
if retries > max_retries:
log.error(f"Too many unrecognized CLI messages ({retries}), stopping")
return
log.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
continue
raise
def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
"""Convert FileAttachment objects to Claude API content blocks.
Images become image content blocks (passed directly to Claude's vision).
Documents are extracted to text and become text content blocks.
Raises:
DocumentExtractionError: If a document cannot be read.
"""
blocks: list[dict] = []
for att in attachments:
if is_image(att.mimeType):
blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
elif is_document(att.mimeType):
text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
blocks.append({
"type": "text",
"text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
})
return blocks
async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
"""Yield a single multimodal user message in Claude Agent SDK format.
The Claude Agent SDK's ``query()`` method accepts either a plain string
or an ``AsyncIterable[dict]`` for custom message formats. This helper
wraps a list of content blocks (text and/or images) in the expected
envelope.
Args:
content_blocks: List of content-block dicts, e.g.
``[{"type": "text", "text": "..."}, {"type": "image", ...}]``.
Yields:
A single dict representing the user message.
"""
yield {
"type": "user",
"message": {"role": "user", "content": content_blocks},
"parent_tool_use_id": None,
"session_id": "default",
}