fix: narrow _fix_blob_seq_ids shim + add repair --mode max-seq-id
The BLOB-seq_id migration shim (PR #664) ran int.from_bytes(..., 'big') over every BLOB in max_seq_id, including chromadb 1.5.x's own native format (b'\x11\x11' + 6 ASCII digits). That conversion yields a ~1.23e18 integer that silently suppresses every subsequent embeddings_queue write for the affected segment (queue filter is seq_id > start), causing silent drawer-write drops after a 1.5.x upgrade. Two-part fix: 1. Shim narrowing (mempalace/backends/chroma.py) - Drop max_seq_id from the shim loop. chromadb owns that column's format; we don't reinterpret it. - Defense-in-depth: skip rows in embeddings whose seq_id BLOB has the sysdb-10 b'\x11\x11' prefix rather than misconvert. 2. Recovery command (mempalace/repair.py, mempalace/cli.py) - mempalace repair --mode max-seq-id [--segment <uuid>] [--from-sidecar <path>] [--dry-run] [--yes] [--no-backup] - Detects poisoned rows via threshold (seq_id > 2**53). - Default heuristic: MAX(embeddings.seq_id) over the collection owning the poisoned segment. Matches METADATA max exactly; VECTOR segments get a few seq_ids ahead (queue skips an already-indexed window — an acceptable loss vs. resetting to 0 and re-processing everything). - --from-sidecar copies clean values from a pre-corruption sqlite db. - Backs up chroma.sqlite3, closes chroma handles, atomic UPDATEs, post-repair verification that raises MaxSeqIdVerificationError if any row is still above threshold. Tests: 8 new in tests/test_repair.py (detection, heuristic, sidecar, dry-run, segment filter, no-op, backup, rollback-on-verify-failure). 3 new in tests/test_backends.py (max_seq_id untouched by shim, sysdb-10 prefix skipped in embeddings, legacy big-endian u64 BLOBs still convert). Full suite: 1103 passed.
This commit is contained in:
+66
-7
@@ -341,12 +341,9 @@ def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path):
|
||||
db_path = tmp_path / "chroma.sqlite3"
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
|
||||
conn.execute("CREATE TABLE max_seq_id (rowid INTEGER PRIMARY KEY, seq_id)")
|
||||
# Insert BLOB seq_ids like ChromaDB 0.6.x would
|
||||
# Insert BLOB seq_id like ChromaDB 0.6.x would
|
||||
blob_42 = (42).to_bytes(8, byteorder="big")
|
||||
blob_99 = (99).to_bytes(8, byteorder="big")
|
||||
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", (blob_42,))
|
||||
conn.execute("INSERT INTO max_seq_id (seq_id) VALUES (?)", (blob_99,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
@@ -355,8 +352,6 @@ def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path):
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
row = conn.execute("SELECT seq_id, typeof(seq_id) FROM embeddings").fetchone()
|
||||
assert row == (42, "integer")
|
||||
row = conn.execute("SELECT seq_id, typeof(seq_id) FROM max_seq_id").fetchone()
|
||||
assert row == (99, "integer")
|
||||
conn.close()
|
||||
|
||||
|
||||
@@ -382,6 +377,71 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path):
|
||||
_fix_blob_seq_ids(str(tmp_path)) # should not raise
|
||||
|
||||
|
||||
def test_fix_blob_seq_ids_does_not_touch_max_seq_id(tmp_path):
|
||||
"""chromadb 1.5.x owns max_seq_id; the shim must not interpret its BLOBs.
|
||||
|
||||
Regression guard for the 2026-04-20 incident: the old shim ran
|
||||
int.from_bytes(..., 'big') over chromadb 1.5.x's native
|
||||
b'\\x11\\x11' + ASCII-digit BLOB, producing a ~1.23e18 integer that
|
||||
silently suppressed every subsequent embeddings_queue write.
|
||||
"""
|
||||
db_path = tmp_path / "chroma.sqlite3"
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
|
||||
conn.execute("CREATE TABLE max_seq_id (rowid INTEGER PRIMARY KEY, seq_id)")
|
||||
sysdb10_blob = b"\x11\x11502607"
|
||||
conn.execute("INSERT INTO max_seq_id (seq_id) VALUES (?)", (sysdb10_blob,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
_fix_blob_seq_ids(str(tmp_path))
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
row = conn.execute("SELECT seq_id, typeof(seq_id) FROM max_seq_id").fetchone()
|
||||
assert row == (sysdb10_blob, "blob")
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_fix_blob_seq_ids_skips_sysdb10_prefix_in_embeddings(tmp_path):
|
||||
"""Defense-in-depth: sysdb-10 prefix in embeddings.seq_id is skipped."""
|
||||
db_path = tmp_path / "chroma.sqlite3"
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
|
||||
sysdb10_blob = b"\x11\x11502607"
|
||||
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", (sysdb10_blob,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
_fix_blob_seq_ids(str(tmp_path))
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
row = conn.execute("SELECT seq_id, typeof(seq_id) FROM embeddings").fetchone()
|
||||
# Still a BLOB — not converted to 1.23e18.
|
||||
assert row == (sysdb10_blob, "blob")
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_fix_blob_seq_ids_still_converts_legacy_blobs_in_embeddings(tmp_path):
|
||||
"""Regression guard: pure big-endian u64 BLOBs still convert for genuine 0.6.x."""
|
||||
db_path = tmp_path / "chroma.sqlite3"
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)")
|
||||
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((42).to_bytes(8, "big"),))
|
||||
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", (b"\x11\x11502607",))
|
||||
conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((7).to_bytes(8, "big"),))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
_fix_blob_seq_ids(str(tmp_path))
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
rows = conn.execute("SELECT seq_id, typeof(seq_id) FROM embeddings ORDER BY rowid").fetchall()
|
||||
assert rows[0] == (42, "integer")
|
||||
assert rows[1] == (b"\x11\x11502607", "blob") # sysdb-10 row left alone
|
||||
assert rows[2] == (7, "integer")
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path):
|
||||
"""The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion."""
|
||||
from mempalace.backends.chroma import _BLOB_FIX_MARKER
|
||||
@@ -447,7 +507,6 @@ def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path):
|
||||
|
||||
mock_connect.assert_not_called()
|
||||
|
||||
|
||||
# ── quarantine_stale_hnsw ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user