mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-03-25 23:03:09 +00:00
Add support for uploading Markdown, Text, Word (.docx), CSV, Excel (.xlsx), PDF, and PowerPoint (.pptx) files in addition to existing JPEG/PNG image uploads in the spec creation and project expansion chat interfaces. Backend changes: - New server/utils/document_extraction.py: in-memory text extraction for all document formats using python-docx, openpyxl, PyPDF2, python-pptx (no disk persistence) - Rename ImageAttachment to FileAttachment across schemas, routers, and chat session services - Add build_attachment_content_blocks() helper in chat_constants.py to route images as image content blocks and documents as extracted text blocks - Separate size limits: 5MB for images, 20MB for documents - Handle extraction errors (corrupt files, encrypted PDFs) gracefully Frontend changes: - Widen accepted MIME types and file extensions in both chat components - Add resolveMimeType() fallback for browsers that don't set MIME on .md files - Document attachments display with FileText icon instead of image thumbnail - ChatMessage renders documents as compact pills with filename and size - Update help text from "attach images" to "attach files" Dependencies added: python-docx, openpyxl, PyPDF2, python-pptx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
148 lines
5.4 KiB
Python
148 lines
5.4 KiB
Python
"""
|
|
Chat Session Constants
|
|
======================
|
|
|
|
Shared constants for all chat session types (assistant, spec, expand).
|
|
|
|
The canonical ``API_ENV_VARS`` list lives in ``env_constants.py`` at the
|
|
project root and is re-exported here for convenience so that existing
|
|
imports (``from .chat_constants import API_ENV_VARS``) continue to work.
|
|
"""
|
|
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any, AsyncGenerator
|
|
|
|
# -------------------------------------------------------------------
|
|
# Root directory of the autoforge project (repository root).
|
|
# Used throughout the server package whenever the repo root is needed.
|
|
# -------------------------------------------------------------------
|
|
ROOT_DIR = Path(__file__).parent.parent.parent
|
|
|
|
# Ensure the project root is on sys.path so we can import env_constants
|
|
# from the root-level module without requiring a package install.
|
|
_root_str = str(ROOT_DIR)
|
|
if _root_str not in sys.path:
|
|
sys.path.insert(0, _root_str)
|
|
|
|
# -------------------------------------------------------------------
|
|
# Environment variables forwarded to Claude CLI subprocesses.
|
|
# Single source of truth lives in env_constants.py at the project root.
|
|
# Re-exported here so existing ``from .chat_constants import API_ENV_VARS``
|
|
# imports continue to work unchanged.
|
|
# -------------------------------------------------------------------
|
|
from env_constants import API_ENV_VARS # noqa: E402, F401
|
|
from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401
|
|
|
|
from ..schemas import FileAttachment
|
|
from ..utils.document_extraction import (
|
|
extract_text_from_document,
|
|
is_document,
|
|
is_image,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
|
|
"""Inspect an exception and determine if it represents a rate-limit.
|
|
|
|
Returns ``(is_rate_limit, retry_seconds)``. ``retry_seconds`` is the
|
|
parsed Retry-After value when available, otherwise ``None`` (caller
|
|
should use exponential backoff).
|
|
"""
|
|
# MessageParseError = unknown CLI message type (e.g. "rate_limit_event").
|
|
# These are informational events, NOT actual rate limit errors.
|
|
# The word "rate_limit" in the type name would false-positive the regex.
|
|
if type(exc).__name__ == "MessageParseError":
|
|
return False, None
|
|
|
|
# For all other exceptions: match error text against known rate-limit patterns
|
|
exc_str = str(exc)
|
|
if is_rate_limit_error(exc_str):
|
|
retry = parse_retry_after(exc_str)
|
|
return True, retry
|
|
|
|
return False, None
|
|
|
|
|
|
async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenerator:
|
|
"""Wrap ``client.receive_response()`` to skip ``MessageParseError``.
|
|
|
|
The Claude Code CLI may emit message types (e.g. ``rate_limit_event``)
|
|
that the installed Python SDK does not recognise, causing
|
|
``MessageParseError`` which kills the async generator. The CLI
|
|
subprocess is still alive and the SDK uses a buffered memory channel,
|
|
so we restart ``receive_response()`` to continue reading remaining
|
|
messages without losing data.
|
|
"""
|
|
max_retries = 50
|
|
retries = 0
|
|
while True:
|
|
try:
|
|
async for msg in client.receive_response():
|
|
yield msg
|
|
return # Normal completion
|
|
except Exception as exc:
|
|
if type(exc).__name__ == "MessageParseError":
|
|
retries += 1
|
|
if retries > max_retries:
|
|
log.error(f"Too many unrecognized CLI messages ({retries}), stopping")
|
|
return
|
|
log.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
|
|
continue
|
|
raise
|
|
|
|
|
|
def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
|
|
"""Convert FileAttachment objects to Claude API content blocks.
|
|
|
|
Images become image content blocks (passed directly to Claude's vision).
|
|
Documents are extracted to text and become text content blocks.
|
|
|
|
Raises:
|
|
DocumentExtractionError: If a document cannot be read.
|
|
"""
|
|
blocks: list[dict] = []
|
|
for att in attachments:
|
|
if is_image(att.mimeType):
|
|
blocks.append({
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": att.mimeType,
|
|
"data": att.base64Data,
|
|
}
|
|
})
|
|
elif is_document(att.mimeType):
|
|
text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
|
|
blocks.append({
|
|
"type": "text",
|
|
"text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
|
|
})
|
|
return blocks
|
|
|
|
|
|
async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
|
|
"""Yield a single multimodal user message in Claude Agent SDK format.
|
|
|
|
The Claude Agent SDK's ``query()`` method accepts either a plain string
|
|
or an ``AsyncIterable[dict]`` for custom message formats. This helper
|
|
wraps a list of content blocks (text and/or images) in the expected
|
|
envelope.
|
|
|
|
Args:
|
|
content_blocks: List of content-block dicts, e.g.
|
|
``[{"type": "text", "text": "..."}, {"type": "image", ...}]``.
|
|
|
|
Yields:
|
|
A single dict representing the user message.
|
|
"""
|
|
yield {
|
|
"type": "user",
|
|
"message": {"role": "user", "content": content_blocks},
|
|
"parent_tool_use_id": None,
|
|
"session_id": "default",
|
|
}
|