Files
autocoder/server/services/assistant_database.py
Auto 94e0b05cb1 refactor: optimize token usage, deduplicate code, fix bugs across agents
Token reduction (~40% per session, ~2.3M fewer tokens per 200-feature project):
- Agent-type-specific tool lists: coding 9, testing 5, init 5 (was 19 for all)
- Right-sized max_turns: coding 300, testing 100 (was 1000 for all)
- Trimmed coding prompt template (~150 lines removed)
- Streamlined testing prompt with batch support
- YOLO mode now strips browser testing instructions from prompt
- Added Grep, WebFetch, WebSearch to expand project session

Performance improvements:
- Rate limit retries start at ~15s with jitter (was fixed 60s)
- Post-spawn delay reduced to 0.5s (was 2s)
- Orchestrator consolidated to 1 DB query per loop (was 5-7)
- Testing agents batch 3 features per session (was 1)
- Smart context compaction preserves critical state, discards noise

Bug fixes:
- Removed ghost feature_release_testing MCP tool (wasted tokens every test session)
- Forward all 9 Vertex AI env vars to chat sessions (was missing 3)
- Fix DetachedInstanceError risk in test batch ORM access
- Prevent duplicate testing of same features in parallel mode

Code deduplication:
- _get_project_path(): 9 copies -> 1 shared utility (project_helpers.py)
- validate_project_name(): 9 copies -> 2 variants in 1 file (validation.py)
- ROOT_DIR: 10 copies -> 1 definition (chat_constants.py)
- API_ENV_VARS: 4 copies -> 1 source of truth (env_constants.py)

Security hardening:
- Unified sensitive directory blocklist (14 dirs, was two divergent lists)
- Cached get_blocked_paths() for O(1) directory listing checks
- Terminal security warning when ALLOW_REMOTE=1 exposes WebSocket
- 20 new security tests for EXTRA_READ_PATHS blocking
- Extracted _validate_command_list() and _validate_pkill_processes() helpers

Type safety:
- 87 mypy errors -> 0 across 58 source files
- Installed types-PyYAML for proper yaml stub types
- Fixed SQLAlchemy Column[T] coercions across all routers

Dead code removed:
- 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs
- 7 unused npm packages removed (Radix UI components with 0 imports)
- AgentAvatar.tsx reduced from 615 -> 119 lines (SVGs extracted to mascotData.tsx)

New CLI options:
- --testing-batch-size (1-5) for parallel mode test batching
- --testing-feature-ids for direct multi-feature testing

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 13:16:24 +02:00

305 lines
11 KiB
Python

"""
Assistant Database
==================
SQLAlchemy models and functions for persisting assistant conversations.
Each project has its own assistant.db file in the project directory.
"""
import logging
import threading
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text, create_engine, func
from sqlalchemy.engine import Engine
from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker
logger = logging.getLogger(__name__)
class Base(DeclarativeBase):
"""SQLAlchemy 2.0 style declarative base."""
pass
# Engine cache to avoid creating new engines for each request
# Key: project directory path (as posix string), Value: SQLAlchemy engine
_engine_cache: dict[str, Engine] = {}
# Lock for thread-safe access to the engine cache
# Prevents race conditions when multiple threads create engines simultaneously
_cache_lock = threading.Lock()
def _utc_now() -> datetime:
"""Return current UTC time. Replacement for deprecated datetime.utcnow()."""
return datetime.now(timezone.utc)
class Conversation(Base):
"""A conversation with the assistant for a project."""
__tablename__ = "conversations"
id = Column(Integer, primary_key=True, index=True)
project_name = Column(String(100), nullable=False, index=True)
title = Column(String(200), nullable=True) # Optional title, derived from first message
created_at = Column(DateTime, default=_utc_now)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)
messages = relationship("ConversationMessage", back_populates="conversation", cascade="all, delete-orphan")
class ConversationMessage(Base):
"""A single message within a conversation."""
__tablename__ = "conversation_messages"
id = Column(Integer, primary_key=True, index=True)
conversation_id = Column(Integer, ForeignKey("conversations.id"), nullable=False, index=True)
role = Column(String(20), nullable=False) # "user" | "assistant" | "system"
content = Column(Text, nullable=False)
timestamp = Column(DateTime, default=_utc_now)
conversation = relationship("Conversation", back_populates="messages")
def get_db_path(project_dir: Path) -> Path:
"""Get the path to the assistant database for a project."""
from autocoder_paths import get_assistant_db_path
return get_assistant_db_path(project_dir)
def get_engine(project_dir: Path):
"""Get or create a SQLAlchemy engine for a project's assistant database.
Uses a cache to avoid creating new engines for each request, which improves
performance by reusing database connections.
Thread-safe: Uses a lock to prevent race conditions when multiple threads
try to create engines simultaneously for the same project.
"""
cache_key = project_dir.as_posix()
# Double-checked locking for thread safety and performance
if cache_key in _engine_cache:
return _engine_cache[cache_key]
with _cache_lock:
# Check again inside the lock in case another thread created it
if cache_key not in _engine_cache:
db_path = get_db_path(project_dir)
# Use as_posix() for cross-platform compatibility with SQLite connection strings
db_url = f"sqlite:///{db_path.as_posix()}"
engine = create_engine(
db_url,
echo=False,
connect_args={
"check_same_thread": False,
"timeout": 30, # Wait up to 30s for locks
}
)
Base.metadata.create_all(engine)
_engine_cache[cache_key] = engine
logger.debug(f"Created new database engine for {cache_key}")
return _engine_cache[cache_key]
def dispose_engine(project_dir: Path) -> bool:
"""Dispose of and remove the cached engine for a project.
This closes all database connections, releasing file locks on Windows.
Should be called before deleting the database file.
Returns:
True if an engine was disposed, False if no engine was cached.
"""
cache_key = project_dir.as_posix()
if cache_key in _engine_cache:
engine = _engine_cache.pop(cache_key)
engine.dispose()
logger.debug(f"Disposed database engine for {cache_key}")
return True
return False
def get_session(project_dir: Path):
"""Get a new database session for a project."""
engine = get_engine(project_dir)
Session = sessionmaker(bind=engine)
return Session()
# ============================================================================
# Conversation Operations
# ============================================================================
def create_conversation(project_dir: Path, project_name: str, title: Optional[str] = None) -> Conversation:
"""Create a new conversation for a project."""
session = get_session(project_dir)
try:
conversation = Conversation(
project_name=project_name,
title=title,
)
session.add(conversation)
session.commit()
session.refresh(conversation)
logger.info(f"Created conversation {conversation.id} for project {project_name}")
return conversation
finally:
session.close()
def get_conversations(project_dir: Path, project_name: str) -> list[dict]:
"""Get all conversations for a project with message counts.
Uses a subquery for message_count to avoid N+1 query problem.
"""
session = get_session(project_dir)
try:
# Subquery to count messages per conversation (avoids N+1 query)
message_count_subquery = (
session.query(
ConversationMessage.conversation_id,
func.count(ConversationMessage.id).label("message_count")
)
.group_by(ConversationMessage.conversation_id)
.subquery()
)
# Join conversation with message counts
conversations = (
session.query(
Conversation,
func.coalesce(message_count_subquery.c.message_count, 0).label("message_count")
)
.outerjoin(
message_count_subquery,
Conversation.id == message_count_subquery.c.conversation_id
)
.filter(Conversation.project_name == project_name)
.order_by(Conversation.updated_at.desc())
.all()
)
return [
{
"id": c.Conversation.id,
"project_name": c.Conversation.project_name,
"title": c.Conversation.title,
"created_at": c.Conversation.created_at.isoformat() if c.Conversation.created_at else None,
"updated_at": c.Conversation.updated_at.isoformat() if c.Conversation.updated_at else None,
"message_count": c.message_count,
}
for c in conversations
]
finally:
session.close()
def get_conversation(project_dir: Path, conversation_id: int) -> Optional[dict]:
"""Get a conversation with all its messages."""
session = get_session(project_dir)
try:
conversation = session.query(Conversation).filter(Conversation.id == conversation_id).first()
if not conversation:
return None
return {
"id": conversation.id,
"project_name": conversation.project_name,
"title": conversation.title,
"created_at": conversation.created_at.isoformat() if conversation.created_at else None,
"updated_at": conversation.updated_at.isoformat() if conversation.updated_at else None,
"messages": [
{
"id": m.id,
"role": m.role,
"content": m.content,
"timestamp": m.timestamp.isoformat() if m.timestamp else None,
}
for m in sorted(conversation.messages, key=lambda x: x.timestamp or datetime.min)
],
}
finally:
session.close()
def delete_conversation(project_dir: Path, conversation_id: int) -> bool:
"""Delete a conversation and all its messages."""
session = get_session(project_dir)
try:
conversation = session.query(Conversation).filter(Conversation.id == conversation_id).first()
if not conversation:
return False
session.delete(conversation)
session.commit()
logger.info(f"Deleted conversation {conversation_id}")
return True
finally:
session.close()
# ============================================================================
# Message Operations
# ============================================================================
def add_message(project_dir: Path, conversation_id: int, role: str, content: str) -> Optional[dict]:
"""Add a message to a conversation."""
session = get_session(project_dir)
try:
conversation = session.query(Conversation).filter(Conversation.id == conversation_id).first()
if not conversation:
return None
message = ConversationMessage(
conversation_id=conversation_id,
role=role,
content=content,
)
session.add(message)
# Update conversation's updated_at timestamp
conversation.updated_at = _utc_now()
# Auto-generate title from first user message if not set
if not conversation.title and role == "user":
# Take first 50 chars of first user message as title
conversation.title = content[:50] + ("..." if len(content) > 50 else "")
session.commit()
session.refresh(message)
logger.debug(f"Added {role} message to conversation {conversation_id}")
return {
"id": message.id,
"role": message.role,
"content": message.content,
"timestamp": message.timestamp.isoformat() if message.timestamp else None,
}
finally:
session.close()
def get_messages(project_dir: Path, conversation_id: int) -> list[dict]:
"""Get all messages for a conversation."""
session = get_session(project_dir)
try:
messages = (
session.query(ConversationMessage)
.filter(ConversationMessage.conversation_id == conversation_id)
.order_by(ConversationMessage.timestamp.asc())
.all()
)
return [
{
"id": m.id,
"role": m.role,
"content": m.content,
"timestamp": m.timestamp.isoformat() if m.timestamp else None,
}
for m in messages
]
finally:
session.close()