Merge pull request #1177 from jphein/fix/blob-seq-marker-guard

fix: skip _fix_blob_seq_ids sqlite open on already-migrated palaces (#1090)
This commit is contained in:
Igor Lins e Silva
2026-04-26 18:28:55 -03:00
committed by GitHub
2 changed files with 87 additions and 0 deletions
+21
View File
@@ -4,6 +4,7 @@ import datetime as _dt
import logging import logging
import os import os
import sqlite3 import sqlite3
from pathlib import Path
from typing import Any, Optional from typing import Any, Optional
import chromadb import chromadb
@@ -239,6 +240,9 @@ def _pin_hnsw_threads(collection) -> None:
logger.debug("_pin_hnsw_threads modify failed", exc_info=True) logger.debug("_pin_hnsw_threads modify failed", exc_info=True)
_BLOB_FIX_MARKER = ".blob_seq_ids_migrated"
def _fix_blob_seq_ids(palace_path: str) -> None: def _fix_blob_seq_ids(palace_path: str) -> None:
"""Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER. """Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER.
@@ -248,10 +252,19 @@ def _fix_blob_seq_ids(palace_path: str) -> None:
type INTEGER) is not compatible with SQL type BLOB". type INTEGER) is not compatible with SQL type BLOB".
Must run BEFORE PersistentClient is created (the compactor fires on init). Must run BEFORE PersistentClient is created (the compactor fires on init).
Opening a Python sqlite3 connection against a ChromaDB 1.5.x WAL-mode
database leaves state that segfaults the next PersistentClient call. After
the migration has run once successfully, a marker file is written so
subsequent opens skip the sqlite connection entirely. Already-migrated
palaces can touch the marker manually to opt into the fast path.
""" """
db_path = os.path.join(palace_path, "chroma.sqlite3") db_path = os.path.join(palace_path, "chroma.sqlite3")
if not os.path.isfile(db_path): if not os.path.isfile(db_path):
return return
marker = os.path.join(palace_path, _BLOB_FIX_MARKER)
if os.path.isfile(marker):
return
try: try:
with sqlite3.connect(db_path) as conn: with sqlite3.connect(db_path) as conn:
for table in ("embeddings", "max_seq_id"): for table in ("embeddings", "max_seq_id"):
@@ -269,6 +282,14 @@ def _fix_blob_seq_ids(palace_path: str) -> None:
conn.commit() conn.commit()
except Exception: except Exception:
logger.exception("Could not fix BLOB seq_ids in %s", db_path) logger.exception("Could not fix BLOB seq_ids in %s", db_path)
return
# Write marker whether or not rows needed migration — the palace is now
# confirmed to be in the INTEGER-seq_id state and future opens can skip the
# sqlite3.connect() entirely.
try:
Path(marker).touch()
except OSError:
logger.exception("Could not write migration marker %s", marker)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
+66
View File
@@ -382,6 +382,72 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path):
_fix_blob_seq_ids(str(tmp_path)) # should not raise _fix_blob_seq_ids(str(tmp_path)) # should not raise
def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path):
"""The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion."""
from mempalace.backends.chroma import _BLOB_FIX_MARKER
db_path = tmp_path / "chroma.sqlite3"
conn = sqlite3.connect(str(db_path))
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((42).to_bytes(8, "big"),))
conn.commit()
conn.close()
marker = tmp_path / _BLOB_FIX_MARKER
assert not marker.exists()
_fix_blob_seq_ids(str(tmp_path))
assert marker.is_file(), "marker must be written after a successful migration"
def test_fix_blob_seq_ids_writes_marker_when_already_integer(tmp_path):
"""The marker is written even when the migration is a no-op (already INTEGER).
The point of the marker is to skip the sqlite3 open on subsequent calls,
not to record that a conversion happened. So a clean palace gets the
marker on first run too — next ``_fix_blob_seq_ids`` call short-circuits
before touching the sqlite3 file.
"""
from mempalace.backends.chroma import _BLOB_FIX_MARKER
db_path = tmp_path / "chroma.sqlite3"
conn = sqlite3.connect(str(db_path))
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id INTEGER)")
conn.execute("INSERT INTO embeddings (seq_id) VALUES (42)")
conn.commit()
conn.close()
marker = tmp_path / _BLOB_FIX_MARKER
assert not marker.exists()
_fix_blob_seq_ids(str(tmp_path))
assert marker.is_file(), "marker must be written even when no BLOBs found"
def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path):
"""When the marker exists, ``_fix_blob_seq_ids`` does not open sqlite3.
This is the load-bearing property of the marker — opening Python's
sqlite3 against a live ChromaDB 1.5.x WAL DB corrupts the next
PersistentClient call (#1090). Once a palace has been migrated, we
never want to open it again, even read-only.
"""
from unittest.mock import patch
from mempalace.backends.chroma import _BLOB_FIX_MARKER
# Pre-create the marker so the function should short-circuit.
db_path = tmp_path / "chroma.sqlite3"
db_path.write_bytes(b"sentinel") # presence required for the function to proceed
(tmp_path / _BLOB_FIX_MARKER).touch()
with patch("mempalace.backends.chroma.sqlite3.connect") as mock_connect:
_fix_blob_seq_ids(str(tmp_path))
mock_connect.assert_not_called()
# ── quarantine_stale_hnsw ───────────────────────────────────────────────── # ── quarantine_stale_hnsw ─────────────────────────────────────────────────