diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index 0ac7501..da01e4d 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -1,11 +1,47 @@ """ChromaDB-backed MemPalace collection adapter.""" +import logging import os +import sqlite3 import chromadb from .base import BaseCollection +logger = logging.getLogger(__name__) + + +def _fix_blob_seq_ids(palace_path: str): + """Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER. + + ChromaDB 0.6.x stored seq_id as big-endian 8-byte BLOBs. ChromaDB 1.5.x + expects INTEGER. The auto-migration doesn't convert existing rows, causing + the Rust compactor to crash with "mismatched types; Rust type u64 (as SQL + type INTEGER) is not compatible with SQL type BLOB". + + Must run BEFORE PersistentClient is created (the compactor fires on init). + """ + db_path = os.path.join(palace_path, "chroma.sqlite3") + if not os.path.isfile(db_path): + return + try: + with sqlite3.connect(db_path) as conn: + for table in ("embeddings", "max_seq_id"): + try: + rows = conn.execute( + f"SELECT rowid, seq_id FROM {table} WHERE typeof(seq_id) = 'blob'" + ).fetchall() + except sqlite3.OperationalError: + continue + if not rows: + continue + updates = [(int.from_bytes(blob, byteorder="big"), rowid) for rowid, blob in rows] + conn.executemany(f"UPDATE {table} SET seq_id = ? WHERE rowid = ?", updates) + logger.info("Fixed %d BLOB seq_ids in %s", len(updates), table) + conn.commit() + except Exception: + logger.exception("Could not fix BLOB seq_ids in %s", db_path) + class ChromaCollection(BaseCollection): """Thin adapter over a ChromaDB collection.""" @@ -46,6 +82,7 @@ class ChromaBackend: except (OSError, NotImplementedError): pass + _fix_blob_seq_ids(palace_path) client = chromadb.PersistentClient(path=palace_path) if create: collection = client.get_or_create_collection(collection_name) diff --git a/tests/test_backends.py b/tests/test_backends.py index 4a97ef1..846134f 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -1,7 +1,9 @@ +import sqlite3 + import chromadb import pytest -from mempalace.backends.chroma import ChromaBackend, ChromaCollection +from mempalace.backends.chroma import ChromaBackend, ChromaCollection, _fix_blob_seq_ids class _FakeCollection: @@ -78,3 +80,49 @@ def test_chroma_backend_create_true_creates_directory_and_collection(tmp_path): client = chromadb.PersistentClient(path=str(palace_path)) client.get_collection("mempalace_drawers") + + +def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path): + """Simulate a ChromaDB 0.6.x database with BLOB seq_ids and verify repair.""" + db_path = tmp_path / "chroma.sqlite3" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)") + conn.execute("CREATE TABLE max_seq_id (rowid INTEGER PRIMARY KEY, seq_id)") + # Insert BLOB seq_ids like ChromaDB 0.6.x would + blob_42 = (42).to_bytes(8, byteorder="big") + blob_99 = (99).to_bytes(8, byteorder="big") + conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", (blob_42,)) + conn.execute("INSERT INTO max_seq_id (seq_id) VALUES (?)", (blob_99,)) + conn.commit() + conn.close() + + _fix_blob_seq_ids(str(tmp_path)) + + conn = sqlite3.connect(str(db_path)) + row = conn.execute("SELECT seq_id, typeof(seq_id) FROM embeddings").fetchone() + assert row == (42, "integer") + row = conn.execute("SELECT seq_id, typeof(seq_id) FROM max_seq_id").fetchone() + assert row == (99, "integer") + conn.close() + + +def test_fix_blob_seq_ids_noop_without_blobs(tmp_path): + """No error when seq_ids are already integers.""" + db_path = tmp_path / "chroma.sqlite3" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id INTEGER)") + conn.execute("INSERT INTO embeddings (seq_id) VALUES (42)") + conn.commit() + conn.close() + + _fix_blob_seq_ids(str(tmp_path)) + + conn = sqlite3.connect(str(db_path)) + row = conn.execute("SELECT seq_id, typeof(seq_id) FROM embeddings").fetchone() + assert row == (42, "integer") + conn.close() + + +def test_fix_blob_seq_ids_noop_without_database(tmp_path): + """No error when palace has no chroma.sqlite3.""" + _fix_blob_seq_ids(str(tmp_path)) # should not raise