fix(blob-seq-marker): tests + style nit per @igorls #1177 review

Three new tests cover the marker contract:

- test_fix_blob_seq_ids_writes_marker_after_blob_path — marker is
  written after a successful BLOB → INTEGER conversion.
- test_fix_blob_seq_ids_writes_marker_when_already_integer — marker
  is written even on a no-op (already-INTEGER) path. The point of
  the marker is to skip sqlite3.connect() on subsequent calls, not
  to record that a conversion happened.
- test_fix_blob_seq_ids_skips_sqlite_when_marker_present — verifies
  the load-bearing property via monkeypatched sqlite3.connect: when
  the marker exists, the function never opens sqlite. This is the
  bug #1090 fix — opening Python's sqlite3 against a live ChromaDB
  1.5.x WAL DB corrupts the next PersistentClient call, and we
  must never do it again post-migration.

Also folded in @igorls's style nit:

- Path(marker).touch() instead of open(marker, "a").close().
  Same effect, reads cleaner. Moved Path import to the top of the
  module since it didn't yet exist there.

35/35 backend tests pass. The "Production: fresh MCP server + stop
hook + mempalace mine subprocess" checkbox in the PR body is checked
in the PR reply — the canonical 151K palace has been running this
fix since #1177 was filed (via Syncthing-replicated source on the
disks daemon at v1.7.0); zero PersistentClient corruption since.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jp
2026-04-26 13:21:50 -07:00
parent bc24aa14e2
commit 247744296d
2 changed files with 71 additions and 1 deletions
+2 -1
View File
@@ -4,6 +4,7 @@ import datetime as _dt
import logging import logging
import os import os
import sqlite3 import sqlite3
from pathlib import Path
from typing import Any, Optional from typing import Any, Optional
import chromadb import chromadb
@@ -206,7 +207,7 @@ def _fix_blob_seq_ids(palace_path: str) -> None:
# confirmed to be in the INTEGER-seq_id state and future opens can skip the # confirmed to be in the INTEGER-seq_id state and future opens can skip the
# sqlite3.connect() entirely. # sqlite3.connect() entirely.
try: try:
open(marker, "a").close() Path(marker).touch()
except OSError: except OSError:
logger.exception("Could not write migration marker %s", marker) logger.exception("Could not write migration marker %s", marker)
+69
View File
@@ -381,6 +381,75 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path):
_fix_blob_seq_ids(str(tmp_path)) # should not raise _fix_blob_seq_ids(str(tmp_path)) # should not raise
def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path):
"""The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion."""
from pathlib import Path
from mempalace.backends.chroma import _BLOB_FIX_MARKER
db_path = tmp_path / "chroma.sqlite3"
conn = sqlite3.connect(str(db_path))
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((42).to_bytes(8, "big"),))
conn.commit()
conn.close()
marker = tmp_path / _BLOB_FIX_MARKER
assert not marker.exists()
_fix_blob_seq_ids(str(tmp_path))
assert marker.is_file(), "marker must be written after a successful migration"
def test_fix_blob_seq_ids_writes_marker_when_already_integer(tmp_path):
"""The marker is written even when the migration is a no-op (already INTEGER).
The point of the marker is to skip the sqlite3 open on subsequent calls,
not to record that a conversion happened. So a clean palace gets the
marker on first run too — next ``_fix_blob_seq_ids`` call short-circuits
before touching the sqlite3 file.
"""
from pathlib import Path
from mempalace.backends.chroma import _BLOB_FIX_MARKER
db_path = tmp_path / "chroma.sqlite3"
conn = sqlite3.connect(str(db_path))
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id INTEGER)")
conn.execute("INSERT INTO embeddings (seq_id) VALUES (42)")
conn.commit()
conn.close()
marker = tmp_path / _BLOB_FIX_MARKER
assert not marker.exists()
_fix_blob_seq_ids(str(tmp_path))
assert marker.is_file(), "marker must be written even when no BLOBs found"
def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path):
"""When the marker exists, ``_fix_blob_seq_ids`` does not open sqlite3.
This is the load-bearing property of the marker — opening Python's
sqlite3 against a live ChromaDB 1.5.x WAL DB corrupts the next
PersistentClient call (#1090). Once a palace has been migrated, we
never want to open it again, even read-only.
"""
from pathlib import Path
from unittest.mock import patch
from mempalace.backends.chroma import _BLOB_FIX_MARKER
# Pre-create the marker so the function should short-circuit.
db_path = tmp_path / "chroma.sqlite3"
db_path.write_bytes(b"sentinel") # presence required for the function to proceed
(tmp_path / _BLOB_FIX_MARKER).touch()
with patch("mempalace.backends.chroma.sqlite3.connect") as mock_connect:
_fix_blob_seq_ids(str(tmp_path))
mock_connect.assert_not_called()
# ── quarantine_stale_hnsw ───────────────────────────────────────────────── # ── quarantine_stale_hnsw ─────────────────────────────────────────────────