fix: prevent SQLite corruption in parallel mode with atomic operations

Replace ineffective threading.Lock() with atomic SQL operations for cross-process safety. Key changes: - Add SQLAlchemy event hooks (do_connect/do_begin) for BEGIN IMMEDIATE transactions in api/database.py - Add atomic_transaction() context manager for multi-statement ops - Convert all feature MCP write operations to atomic UPDATE...WHERE with compare-and-swap patterns (feature_claim, mark_passing, etc.) - Add WHERE passes=0 state guard to feature_mark_passing - Add WAL checkpoint on shutdown and idempotent cleanup() in parallel_orchestrator.py with async-safe signal handling - Wrap SQLite connections with contextlib.closing() in progress.py - Add thread-safe engine cache with double-checked locking in assistant_database.py - Migrate to SQLAlchemy 2.0 DeclarativeBase across all modules Inspired by PR #108 (cabana8471-arch), with fixes for nested BEGIN EXCLUSIVE bug and missing state guards. Closes #106 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 07:23:35 +00:00 · 2026-02-01 09:45:20 +02:00
parent 6609a0f7d6
commit d8a8c83447
6 changed files with 489 additions and 255 deletions
--- a/server/services/assistant_database.py
+++ b/server/services/assistant_database.py
@@ -7,21 +7,28 @@ Each project has its own assistant.db file in the project directory.
 """

 import logging
+import threading
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Optional

 from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text, create_engine, func
-from sqlalchemy.orm import declarative_base, relationship, sessionmaker
+from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker

 logger = logging.getLogger(__name__)

-Base = declarative_base()
+class Base(DeclarativeBase):
+    """SQLAlchemy 2.0 style declarative base."""
+    pass

 # Engine cache to avoid creating new engines for each request
 # Key: project directory path (as posix string), Value: SQLAlchemy engine
 _engine_cache: dict[str, object] = {}

+# Lock for thread-safe access to the engine cache
+# Prevents race conditions when multiple threads create engines simultaneously
+_cache_lock = threading.Lock()
+

 def _utc_now() -> datetime:
    """Return current UTC time. Replacement for deprecated datetime.utcnow()."""
@@ -64,17 +71,33 @@ def get_engine(project_dir: Path):

    Uses a cache to avoid creating new engines for each request, which improves
    performance by reusing database connections.
+
+    Thread-safe: Uses a lock to prevent race conditions when multiple threads
+    try to create engines simultaneously for the same project.
    """
    cache_key = project_dir.as_posix()

-    if cache_key not in _engine_cache:
-        db_path = get_db_path(project_dir)
-        # Use as_posix() for cross-platform compatibility with SQLite connection strings
-        db_url = f"sqlite:///{db_path.as_posix()}"
-        engine = create_engine(db_url, echo=False)
-        Base.metadata.create_all(engine)
-        _engine_cache[cache_key] = engine
-        logger.debug(f"Created new database engine for {cache_key}")
+    # Double-checked locking for thread safety and performance
+    if cache_key in _engine_cache:
+        return _engine_cache[cache_key]
+
+    with _cache_lock:
+        # Check again inside the lock in case another thread created it
+        if cache_key not in _engine_cache:
+            db_path = get_db_path(project_dir)
+            # Use as_posix() for cross-platform compatibility with SQLite connection strings
+            db_url = f"sqlite:///{db_path.as_posix()}"
+            engine = create_engine(
+                db_url,
+                echo=False,
+                connect_args={
+                    "check_same_thread": False,
+                    "timeout": 30,  # Wait up to 30s for locks
+                }
+            )
+            Base.metadata.create_all(engine)
+            _engine_cache[cache_key] = engine
+            logger.debug(f"Created new database engine for {cache_key}")

    return _engine_cache[cache_key]