From bc24aa14e210e525cb6bbe72d37d4159f6199138 Mon Sep 17 00:00:00 2001 From: jp Date: Fri, 24 Apr 2026 13:35:07 -0700 Subject: [PATCH 1/3] fix: skip _fix_blob_seq_ids sqlite open on already-migrated palaces (#1090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opening chroma.sqlite3 via Python's sqlite3.connect() against a live ChromaDB 1.5.x WAL-mode database leaves state that segfaults the next PersistentClient call — the same failure mode tracked at #1090. _fix_blob_seq_ids runs unconditionally on every make_client() call, so every fresh process (MCP server, stop hook, CLI) re-triggers the sqlite open → corrupt → segfault cycle on palaces that have already completed the 0.6.x → 1.5.x seq_id migration. Guard with a .blob_seq_ids_migrated marker file in the palace directory: - If marker exists, return immediately — skip sqlite entirely - After successful migration (or confirmation that no BLOBs remain), write the marker so subsequent opens take the fast path - Palaces that never had BLOB seq_ids also get the marker on first open, so they too avoid the redundant sqlite open after that - Already-migrated palaces can touch the marker manually to opt in Test plan: Direct test — run _fix_blob_seq_ids twice against a fresh palace; second call returns immediately because marker exists. 1094 existing tests pass. --- mempalace/backends/chroma.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index c8d2f46..1de8a09 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -159,6 +159,9 @@ def _pin_hnsw_threads(collection) -> None: logger.debug("_pin_hnsw_threads modify failed", exc_info=True) +_BLOB_FIX_MARKER = ".blob_seq_ids_migrated" + + def _fix_blob_seq_ids(palace_path: str) -> None: """Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER. @@ -168,10 +171,19 @@ def _fix_blob_seq_ids(palace_path: str) -> None: type INTEGER) is not compatible with SQL type BLOB". Must run BEFORE PersistentClient is created (the compactor fires on init). + + Opening a Python sqlite3 connection against a ChromaDB 1.5.x WAL-mode + database leaves state that segfaults the next PersistentClient call. After + the migration has run once successfully, a marker file is written so + subsequent opens skip the sqlite connection entirely. Already-migrated + palaces can touch the marker manually to opt into the fast path. """ db_path = os.path.join(palace_path, "chroma.sqlite3") if not os.path.isfile(db_path): return + marker = os.path.join(palace_path, _BLOB_FIX_MARKER) + if os.path.isfile(marker): + return try: with sqlite3.connect(db_path) as conn: for table in ("embeddings", "max_seq_id"): @@ -189,6 +201,14 @@ def _fix_blob_seq_ids(palace_path: str) -> None: conn.commit() except Exception: logger.exception("Could not fix BLOB seq_ids in %s", db_path) + return + # Write marker whether or not rows needed migration — the palace is now + # confirmed to be in the INTEGER-seq_id state and future opens can skip the + # sqlite3.connect() entirely. + try: + open(marker, "a").close() + except OSError: + logger.exception("Could not write migration marker %s", marker) # --------------------------------------------------------------------------- From 247744296db105cede62aa727e2454dc701eb231 Mon Sep 17 00:00:00 2001 From: jp Date: Sun, 26 Apr 2026 13:21:50 -0700 Subject: [PATCH 2/3] fix(blob-seq-marker): tests + style nit per @igorls #1177 review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three new tests cover the marker contract: - test_fix_blob_seq_ids_writes_marker_after_blob_path — marker is written after a successful BLOB → INTEGER conversion. - test_fix_blob_seq_ids_writes_marker_when_already_integer — marker is written even on a no-op (already-INTEGER) path. The point of the marker is to skip sqlite3.connect() on subsequent calls, not to record that a conversion happened. - test_fix_blob_seq_ids_skips_sqlite_when_marker_present — verifies the load-bearing property via monkeypatched sqlite3.connect: when the marker exists, the function never opens sqlite. This is the bug #1090 fix — opening Python's sqlite3 against a live ChromaDB 1.5.x WAL DB corrupts the next PersistentClient call, and we must never do it again post-migration. Also folded in @igorls's style nit: - Path(marker).touch() instead of open(marker, "a").close(). Same effect, reads cleaner. Moved Path import to the top of the module since it didn't yet exist there. 35/35 backend tests pass. The "Production: fresh MCP server + stop hook + mempalace mine subprocess" checkbox in the PR body is checked in the PR reply — the canonical 151K palace has been running this fix since #1177 was filed (via Syncthing-replicated source on the disks daemon at v1.7.0); zero PersistentClient corruption since. Co-Authored-By: Claude Opus 4.7 (1M context) --- mempalace/backends/chroma.py | 3 +- tests/test_backends.py | 69 ++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index 1de8a09..e1ab9d7 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -4,6 +4,7 @@ import datetime as _dt import logging import os import sqlite3 +from pathlib import Path from typing import Any, Optional import chromadb @@ -206,7 +207,7 @@ def _fix_blob_seq_ids(palace_path: str) -> None: # confirmed to be in the INTEGER-seq_id state and future opens can skip the # sqlite3.connect() entirely. try: - open(marker, "a").close() + Path(marker).touch() except OSError: logger.exception("Could not write migration marker %s", marker) diff --git a/tests/test_backends.py b/tests/test_backends.py index e47eb6f..22c6e2e 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -381,6 +381,75 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path): _fix_blob_seq_ids(str(tmp_path)) # should not raise +def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path): + """The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion.""" + from pathlib import Path + from mempalace.backends.chroma import _BLOB_FIX_MARKER + + db_path = tmp_path / "chroma.sqlite3" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)") + conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((42).to_bytes(8, "big"),)) + conn.commit() + conn.close() + + marker = tmp_path / _BLOB_FIX_MARKER + assert not marker.exists() + + _fix_blob_seq_ids(str(tmp_path)) + + assert marker.is_file(), "marker must be written after a successful migration" + + +def test_fix_blob_seq_ids_writes_marker_when_already_integer(tmp_path): + """The marker is written even when the migration is a no-op (already INTEGER). + + The point of the marker is to skip the sqlite3 open on subsequent calls, + not to record that a conversion happened. So a clean palace gets the + marker on first run too — next ``_fix_blob_seq_ids`` call short-circuits + before touching the sqlite3 file. + """ + from pathlib import Path + from mempalace.backends.chroma import _BLOB_FIX_MARKER + + db_path = tmp_path / "chroma.sqlite3" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id INTEGER)") + conn.execute("INSERT INTO embeddings (seq_id) VALUES (42)") + conn.commit() + conn.close() + + marker = tmp_path / _BLOB_FIX_MARKER + assert not marker.exists() + + _fix_blob_seq_ids(str(tmp_path)) + + assert marker.is_file(), "marker must be written even when no BLOBs found" + + +def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path): + """When the marker exists, ``_fix_blob_seq_ids`` does not open sqlite3. + + This is the load-bearing property of the marker — opening Python's + sqlite3 against a live ChromaDB 1.5.x WAL DB corrupts the next + PersistentClient call (#1090). Once a palace has been migrated, we + never want to open it again, even read-only. + """ + from pathlib import Path + from unittest.mock import patch + from mempalace.backends.chroma import _BLOB_FIX_MARKER + + # Pre-create the marker so the function should short-circuit. + db_path = tmp_path / "chroma.sqlite3" + db_path.write_bytes(b"sentinel") # presence required for the function to proceed + (tmp_path / _BLOB_FIX_MARKER).touch() + + with patch("mempalace.backends.chroma.sqlite3.connect") as mock_connect: + _fix_blob_seq_ids(str(tmp_path)) + + mock_connect.assert_not_called() + + # ── quarantine_stale_hnsw ───────────────────────────────────────────────── From 8a1fd95b1ba545bc1583917c3745e7edcb40d17c Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:23:31 -0300 Subject: [PATCH 3/3] style: drop unused pathlib.Path imports in marker tests --- tests/test_backends.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_backends.py b/tests/test_backends.py index 22c6e2e..40237ed 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -383,7 +383,6 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path): def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path): """The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion.""" - from pathlib import Path from mempalace.backends.chroma import _BLOB_FIX_MARKER db_path = tmp_path / "chroma.sqlite3" @@ -409,7 +408,6 @@ def test_fix_blob_seq_ids_writes_marker_when_already_integer(tmp_path): marker on first run too — next ``_fix_blob_seq_ids`` call short-circuits before touching the sqlite3 file. """ - from pathlib import Path from mempalace.backends.chroma import _BLOB_FIX_MARKER db_path = tmp_path / "chroma.sqlite3" @@ -435,7 +433,6 @@ def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path): PersistentClient call (#1090). Once a palace has been migrated, we never want to open it again, even read-only. """ - from pathlib import Path from unittest.mock import patch from mempalace.backends.chroma import _BLOB_FIX_MARKER