mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-02-02 07:23:35 +00:00
fix: prevent SQLite corruption in parallel mode with atomic operations
Replace ineffective threading.Lock() with atomic SQL operations for cross-process safety. Key changes: - Add SQLAlchemy event hooks (do_connect/do_begin) for BEGIN IMMEDIATE transactions in api/database.py - Add atomic_transaction() context manager for multi-statement ops - Convert all feature MCP write operations to atomic UPDATE...WHERE with compare-and-swap patterns (feature_claim, mark_passing, etc.) - Add WHERE passes=0 state guard to feature_mark_passing - Add WAL checkpoint on shutdown and idempotent cleanup() in parallel_orchestrator.py with async-safe signal handling - Wrap SQLite connections with contextlib.closing() in progress.py - Add thread-safe engine cache with double-checked locking in assistant_database.py - Migrate to SQLAlchemy 2.0 DeclarativeBase across all modules Inspired by PR #108 (cabana8471-arch), with fixes for nested BEGIN EXCLUSIVE bug and missing state guards. Closes #106 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
125
api/database.py
125
api/database.py
@@ -8,7 +8,7 @@ SQLite database schema for feature storage using SQLAlchemy.
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import Generator, Optional
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
@@ -26,13 +26,16 @@ from sqlalchemy import (
|
||||
String,
|
||||
Text,
|
||||
create_engine,
|
||||
event,
|
||||
text,
|
||||
)
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import Session, relationship, sessionmaker
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, relationship, sessionmaker
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""SQLAlchemy 2.0 style declarative base."""
|
||||
pass
|
||||
|
||||
|
||||
class Feature(Base):
|
||||
@@ -307,11 +310,11 @@ def _migrate_add_schedules_tables(engine) -> None:
|
||||
|
||||
# Create schedules table if missing
|
||||
if "schedules" not in existing_tables:
|
||||
Schedule.__table__.create(bind=engine)
|
||||
Schedule.__table__.create(bind=engine) # type: ignore[attr-defined]
|
||||
|
||||
# Create schedule_overrides table if missing
|
||||
if "schedule_overrides" not in existing_tables:
|
||||
ScheduleOverride.__table__.create(bind=engine)
|
||||
ScheduleOverride.__table__.create(bind=engine) # type: ignore[attr-defined]
|
||||
|
||||
# Add crash_count column if missing (for upgrades)
|
||||
if "schedules" in existing_tables:
|
||||
@@ -332,6 +335,35 @@ def _migrate_add_schedules_tables(engine) -> None:
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _configure_sqlite_immediate_transactions(engine) -> None:
|
||||
"""Configure engine for IMMEDIATE transactions via event hooks.
|
||||
|
||||
Per SQLAlchemy docs: https://docs.sqlalchemy.org/en/20/dialects/sqlite.html
|
||||
|
||||
This replaces fragile pysqlite implicit transaction handling with explicit
|
||||
BEGIN IMMEDIATE at transaction start. Benefits:
|
||||
- Acquires write lock immediately, preventing stale reads
|
||||
- Works correctly regardless of prior ORM operations
|
||||
- Future-proof: won't break when pysqlite legacy mode is removed in Python 3.16
|
||||
"""
|
||||
@event.listens_for(engine, "connect")
|
||||
def do_connect(dbapi_connection, connection_record):
|
||||
# Disable pysqlite's implicit transaction handling
|
||||
dbapi_connection.isolation_level = None
|
||||
|
||||
# Set busy_timeout on raw connection before any transactions
|
||||
cursor = dbapi_connection.cursor()
|
||||
try:
|
||||
cursor.execute("PRAGMA busy_timeout=30000")
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
@event.listens_for(engine, "begin")
|
||||
def do_begin(conn):
|
||||
# Use IMMEDIATE for all transactions to prevent stale reads
|
||||
conn.exec_driver_sql("BEGIN IMMEDIATE")
|
||||
|
||||
|
||||
def create_database(project_dir: Path) -> tuple:
|
||||
"""
|
||||
Create database and return engine + session maker.
|
||||
@@ -351,21 +383,37 @@ def create_database(project_dir: Path) -> tuple:
|
||||
return _engine_cache[cache_key]
|
||||
|
||||
db_url = get_database_url(project_dir)
|
||||
engine = create_engine(db_url, connect_args={
|
||||
"check_same_thread": False,
|
||||
"timeout": 30 # Wait up to 30s for locks
|
||||
})
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
# Choose journal mode based on filesystem type
|
||||
# WAL mode doesn't work reliably on network filesystems and can cause corruption
|
||||
is_network = _is_network_path(project_dir)
|
||||
journal_mode = "DELETE" if is_network else "WAL"
|
||||
|
||||
engine = create_engine(db_url, connect_args={
|
||||
"check_same_thread": False,
|
||||
"timeout": 30 # Wait up to 30s for locks
|
||||
})
|
||||
|
||||
# Set journal mode BEFORE configuring event hooks
|
||||
# PRAGMA journal_mode must run outside of a transaction, and our event hooks
|
||||
# start a transaction with BEGIN IMMEDIATE on every operation
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text(f"PRAGMA journal_mode={journal_mode}"))
|
||||
conn.execute(text("PRAGMA busy_timeout=30000"))
|
||||
conn.commit()
|
||||
# Get raw DBAPI connection to execute PRAGMA outside transaction
|
||||
raw_conn = conn.connection.dbapi_connection
|
||||
if raw_conn is None:
|
||||
raise RuntimeError("Failed to get raw DBAPI connection")
|
||||
cursor = raw_conn.cursor()
|
||||
try:
|
||||
cursor.execute(f"PRAGMA journal_mode={journal_mode}")
|
||||
cursor.execute("PRAGMA busy_timeout=30000")
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
# Configure IMMEDIATE transactions via event hooks AFTER setting PRAGMAs
|
||||
# This must happen before create_all() and migrations run
|
||||
_configure_sqlite_immediate_transactions(engine)
|
||||
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
# Migrate existing databases
|
||||
_migrate_add_in_progress_column(engine)
|
||||
@@ -417,7 +465,7 @@ def set_session_maker(session_maker: sessionmaker) -> None:
|
||||
_session_maker = session_maker
|
||||
|
||||
|
||||
def get_db() -> Session:
|
||||
def get_db() -> Generator[Session, None, None]:
|
||||
"""
|
||||
Dependency for FastAPI to get database session.
|
||||
|
||||
@@ -434,3 +482,50 @@ def get_db() -> Session:
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Atomic Transaction Helpers for Parallel Mode
|
||||
# =============================================================================
|
||||
# These helpers prevent database corruption when multiple processes access the
|
||||
# same SQLite database concurrently. They use IMMEDIATE transactions which
|
||||
# acquire write locks at the start (preventing stale reads) and atomic
|
||||
# UPDATE ... WHERE clauses (preventing check-then-modify races).
|
||||
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
@contextmanager
|
||||
def atomic_transaction(session_maker):
|
||||
"""Context manager for atomic SQLite transactions.
|
||||
|
||||
Acquires a write lock immediately via BEGIN IMMEDIATE (configured by
|
||||
engine event hooks), preventing stale reads in read-modify-write patterns.
|
||||
This is essential for preventing race conditions in parallel mode.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker
|
||||
|
||||
Yields:
|
||||
SQLAlchemy session with automatic commit/rollback
|
||||
|
||||
Example:
|
||||
with atomic_transaction(session_maker) as session:
|
||||
# All reads in this block are protected by write lock
|
||||
feature = session.query(Feature).filter(...).first()
|
||||
feature.priority = new_priority
|
||||
# Commit happens automatically on exit
|
||||
"""
|
||||
session = session_maker()
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except Exception:
|
||||
try:
|
||||
session.rollback()
|
||||
except Exception:
|
||||
pass # Don't let rollback failure mask original error
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
Reference in New Issue
Block a user