fix(repair): address PR #1310 review feedback
Five small hardening fixes for the from-sqlite rebuild path, all from mjc's review on #1310: - repair.py: drawers collection name now resolves from MempalaceConfig().collection_name via _drawers_collection_name() (closets stays fixed by design — AAAK index references drawer IDs by string). Lines up with the broader configured-collection work in #1312 so that PR can rebase cleanly on top. - repair.py: create_collection() moved inside the try block in _rebuild_one_collection so a Chroma "Collection already exists" failure surfaces as RebuildPartialError with archive_path, not an unstructured exception that strands the user without recovery instructions. - repair.py: rebuild_from_sqlite wraps backend lifetime in try/finally with backend.close() so PersistentClient handles to dest_palace are released on every exit path. The from-sqlite path post-dates #1285's lifecycle hardening of the legacy rebuild, so this needed its own cleanup. - cli.py: cmd_repair (from-sqlite mode) now exits non-zero when rebuild_from_sqlite returns {} (validation refusal sentinel), so unattended scripts/CI distinguish "invalid inputs" from a successful rebuild that legitimately found zero rows. - tests/test_repair.py: test_extract_via_sqlite_returns_all_rows_with_metadata now asserts every backing segment is scope='METADATA', locking in the segment-layout assumption against future regressions that point the JOIN at the VECTOR segment. New test coverage: - test_rebuild_from_sqlite_honors_configured_drawer_collection_name - test_cmd_repair_from_sqlite_validation_refusal_exits_nonzero - test_cmd_repair_from_sqlite_success_does_not_exit Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
committed by
Igor Lins e Silva
parent
cb6bfd5231
commit
d92c741084
+10
-1
@@ -699,7 +699,7 @@ def cmd_repair(args):
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rebuild_from_sqlite(
|
counts = rebuild_from_sqlite(
|
||||||
source_palace=source_path,
|
source_palace=source_path,
|
||||||
dest_palace=palace_path,
|
dest_palace=palace_path,
|
||||||
archive_existing_dest=archive_existing,
|
archive_existing_dest=archive_existing,
|
||||||
@@ -713,6 +713,15 @@ def cmd_repair(args):
|
|||||||
f"Failed in collection: {exc.failed_collection}"
|
f"Failed in collection: {exc.failed_collection}"
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
# An empty counts dict is rebuild_from_sqlite's documented signal
|
||||||
|
# for a validation refusal (missing source, existing dest,
|
||||||
|
# in-place without --archive-existing). The library already
|
||||||
|
# printed an actionable message; exit non-zero so unattended
|
||||||
|
# scripts/CI distinguish "invalid inputs" from a successful
|
||||||
|
# rebuild that legitimately found zero rows (which still returns
|
||||||
|
# a populated dict with 0-valued counts).
|
||||||
|
if not counts:
|
||||||
|
sys.exit(1)
|
||||||
return
|
return
|
||||||
|
|
||||||
db_path = os.path.join(palace_path, "chroma.sqlite3")
|
db_path = os.path.join(palace_path, "chroma.sqlite3")
|
||||||
|
|||||||
+90
-31
@@ -43,11 +43,46 @@ from .backends.chroma import ChromaBackend, hnsw_capacity_status
|
|||||||
|
|
||||||
COLLECTION_NAME = "mempalace_drawers"
|
COLLECTION_NAME = "mempalace_drawers"
|
||||||
|
|
||||||
# Collections rebuilt by ``rebuild_from_sqlite``. Order matters for the
|
# The closets collection (AAAK index layer) is intentionally fixed —
|
||||||
# upsert pass — drawers carry the bulk of the data, closets are the AAAK
|
# closets reference drawer IDs by string and live alongside drawers in the
|
||||||
# index layer and reference drawer IDs by string in their documents (no
|
# same palace; renaming the closets collection per-deployment would break
|
||||||
# foreign-key validation, so ordering is informational, not load-bearing).
|
# cross-palace AAAK lookups. Drawer collection name comes from config
|
||||||
RECOVERABLE_COLLECTIONS = ("mempalace_drawers", "mempalace_closets")
|
# (see ``_recoverable_collections``).
|
||||||
|
CLOSETS_COLLECTION_NAME = "mempalace_closets"
|
||||||
|
|
||||||
|
|
||||||
|
def _drawers_collection_name() -> str:
|
||||||
|
"""Resolve the drawers collection name from user config, falling back
|
||||||
|
to the module default ``COLLECTION_NAME`` if config is unreadable.
|
||||||
|
|
||||||
|
Recovery flows must honor ``MempalaceConfig().collection_name`` so a
|
||||||
|
user with a non-default drawer collection (e.g. multi-palace setups)
|
||||||
|
rebuilds the right rows. Closets remain fixed — see
|
||||||
|
``CLOSETS_COLLECTION_NAME``.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from .config import MempalaceConfig
|
||||||
|
|
||||||
|
return MempalaceConfig().collection_name or COLLECTION_NAME
|
||||||
|
except Exception:
|
||||||
|
return COLLECTION_NAME
|
||||||
|
|
||||||
|
|
||||||
|
def _recoverable_collections() -> tuple[str, ...]:
|
||||||
|
"""Collections rebuilt by ``rebuild_from_sqlite``, in upsert order.
|
||||||
|
|
||||||
|
Drawers first (bulk data), then closets (AAAK index layer that
|
||||||
|
references drawer IDs by string in their documents — no
|
||||||
|
foreign-key validation, so ordering is informational, not
|
||||||
|
load-bearing).
|
||||||
|
"""
|
||||||
|
return (_drawers_collection_name(), CLOSETS_COLLECTION_NAME)
|
||||||
|
|
||||||
|
|
||||||
|
# Back-compat alias for callers that imported the constant. New code
|
||||||
|
# should call ``_recoverable_collections()`` so config changes are picked
|
||||||
|
# up at call time.
|
||||||
|
RECOVERABLE_COLLECTIONS = (COLLECTION_NAME, CLOSETS_COLLECTION_NAME)
|
||||||
|
|
||||||
|
|
||||||
def _get_palace_path():
|
def _get_palace_path():
|
||||||
@@ -487,12 +522,11 @@ def _rebuild_one_collection(
|
|||||||
caller can stop the loop and print recovery instructions instead of
|
caller can stop the loop and print recovery instructions instead of
|
||||||
silently shipping a partial palace.
|
silently shipping a partial palace.
|
||||||
"""
|
"""
|
||||||
col = backend.create_collection(dest_palace, collection_name)
|
|
||||||
|
|
||||||
ids: list[str] = []
|
ids: list[str] = []
|
||||||
docs: list[str] = []
|
docs: list[str] = []
|
||||||
metas: list[dict] = []
|
metas: list[dict] = []
|
||||||
upserted = 0
|
upserted = 0
|
||||||
|
col = None
|
||||||
|
|
||||||
def _flush() -> int:
|
def _flush() -> int:
|
||||||
nonlocal upserted
|
nonlocal upserted
|
||||||
@@ -507,6 +541,14 @@ def _rebuild_one_collection(
|
|||||||
return upserted
|
return upserted
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# ``create_collection`` lives inside the try so a Chroma-side
|
||||||
|
# "Collection already exists" failure (which can happen when the
|
||||||
|
# process-wide System cache still holds a pre-archive schema) is
|
||||||
|
# reported as a structured ``RebuildPartialError`` carrying
|
||||||
|
# ``archive_path`` — instead of an unstructured exception that
|
||||||
|
# strands the user without recovery instructions.
|
||||||
|
col = backend.create_collection(dest_palace, collection_name)
|
||||||
|
|
||||||
for emb_id, doc, meta in extract_via_sqlite(source_palace, collection_name):
|
for emb_id, doc, meta in extract_via_sqlite(source_palace, collection_name):
|
||||||
ids.append(emb_id)
|
ids.append(emb_id)
|
||||||
docs.append(doc or "")
|
docs.append(doc or "")
|
||||||
@@ -664,8 +706,14 @@ def rebuild_from_sqlite(
|
|||||||
|
|
||||||
Returns a ``{collection_name: row_count}`` dict so callers (CLI,
|
Returns a ``{collection_name: row_count}`` dict so callers (CLI,
|
||||||
tests) can verify the per-collection rebuild count without parsing
|
tests) can verify the per-collection rebuild count without parsing
|
||||||
stdout. Returns ``{}`` on validation failures (missing source,
|
stdout. A successful rebuild always returns a dict with one key per
|
||||||
refusing to overwrite). Raises :class:`RebuildPartialError` if a
|
recoverable collection (values may be ``0`` when a collection is
|
||||||
|
legitimately empty in the source). The empty dict ``{}`` is reserved
|
||||||
|
for validation refusals (missing source DB, refusing to overwrite an
|
||||||
|
existing dest, in-place mode without ``archive_existing_dest``); CLI
|
||||||
|
callers should treat ``{}`` as an error and exit non-zero so CI and
|
||||||
|
scripts can distinguish "invalid inputs" from "successful recovery
|
||||||
|
that found zero rows." Raises :class:`RebuildPartialError` if a
|
||||||
chromadb upsert fails partway through; the dest palace is left in
|
chromadb upsert fails partway through; the dest palace is left in
|
||||||
place so the user can inspect what landed, and the in-place archive
|
place so the user can inspect what landed, and the in-place archive
|
||||||
(when applicable) is reported in the error so the user can re-run
|
(when applicable) is reported in the error so the user can re-run
|
||||||
@@ -765,31 +813,42 @@ def rebuild_from_sqlite(
|
|||||||
|
|
||||||
os.makedirs(dest_palace, exist_ok=True)
|
os.makedirs(dest_palace, exist_ok=True)
|
||||||
|
|
||||||
|
# Backend lifetime is wrapped in try/finally so the dest palace's
|
||||||
|
# PersistentClient handle (opened lazily inside ``create_collection``
|
||||||
|
# / ``get_collection``) is released on every exit path: success,
|
||||||
|
# ``RebuildPartialError``, or any unexpected exception. Without this,
|
||||||
|
# a long-running process that calls ``rebuild_from_sqlite`` would
|
||||||
|
# leak SQLite/HNSW file handles into Chroma's ``SharedSystemClient``
|
||||||
|
# cache, surfacing later as "Collection already exists" on the next
|
||||||
|
# in-place rebuild or as a Windows file-lock failure on cleanup
|
||||||
|
# (cf. #1285's lifecycle hardening for the legacy rebuild path).
|
||||||
backend = ChromaBackend()
|
backend = ChromaBackend()
|
||||||
counts: dict[str, int] = {}
|
counts: dict[str, int] = {}
|
||||||
|
try:
|
||||||
|
for cname in _recoverable_collections():
|
||||||
|
print(f"\n [{cname}]")
|
||||||
|
upserted = _rebuild_one_collection(
|
||||||
|
backend=backend,
|
||||||
|
source_palace=source_palace,
|
||||||
|
dest_palace=dest_palace,
|
||||||
|
collection_name=cname,
|
||||||
|
batch_size=batch_size,
|
||||||
|
archive_path=archive_path,
|
||||||
|
counts_so_far=counts,
|
||||||
|
)
|
||||||
|
counts[cname] = upserted
|
||||||
|
if upserted == 0:
|
||||||
|
print(f" no rows found for {cname} in source palace")
|
||||||
|
else:
|
||||||
|
print(f" done: {upserted} rows in {cname}")
|
||||||
|
|
||||||
for cname in RECOVERABLE_COLLECTIONS:
|
print(f"\n Rebuild complete. {sum(counts.values())} total rows.")
|
||||||
print(f"\n [{cname}]")
|
if archive_path is not None:
|
||||||
upserted = _rebuild_one_collection(
|
print(f" Original palace archived at: {archive_path}")
|
||||||
backend=backend,
|
print(f"{'=' * 55}\n")
|
||||||
source_palace=source_palace,
|
return counts
|
||||||
dest_palace=dest_palace,
|
finally:
|
||||||
collection_name=cname,
|
backend.close()
|
||||||
batch_size=batch_size,
|
|
||||||
archive_path=archive_path,
|
|
||||||
counts_so_far=counts,
|
|
||||||
)
|
|
||||||
counts[cname] = upserted
|
|
||||||
if upserted == 0:
|
|
||||||
print(f" no rows found for {cname} in source palace")
|
|
||||||
else:
|
|
||||||
print(f" done: {upserted} rows in {cname}")
|
|
||||||
|
|
||||||
print(f"\n Rebuild complete. {sum(counts.values())} total rows.")
|
|
||||||
if archive_path is not None:
|
|
||||||
print(f" Original palace archived at: {archive_path}")
|
|
||||||
print(f"{'=' * 55}\n")
|
|
||||||
return counts
|
|
||||||
|
|
||||||
|
|
||||||
def status(palace_path=None) -> dict:
|
def status(palace_path=None) -> dict:
|
||||||
|
|||||||
@@ -1097,3 +1097,64 @@ def test_reconfigure_stdio_is_noop_off_windows():
|
|||||||
_reconfigure_stdio_utf8_on_windows()
|
_reconfigure_stdio_utf8_on_windows()
|
||||||
|
|
||||||
assert stdin.reconfigure_calls == []
|
assert stdin.reconfigure_calls == []
|
||||||
|
|
||||||
|
|
||||||
|
# ── cmd_repair: from-sqlite mode exit codes ──────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.cli.MempalaceConfig")
|
||||||
|
def test_cmd_repair_from_sqlite_validation_refusal_exits_nonzero(mock_config_cls, tmp_path, capsys):
|
||||||
|
"""When ``rebuild_from_sqlite`` returns ``{}`` for a validation
|
||||||
|
refusal (missing source DB, in-place without --archive-existing,
|
||||||
|
refusing to overwrite an existing dest), the CLI must surface a
|
||||||
|
non-zero exit so unattended scripts and CI distinguish "invalid
|
||||||
|
inputs" from "successful recovery that found zero rows."
|
||||||
|
|
||||||
|
Catches: a regression where the CLI treats the validation-refusal
|
||||||
|
sentinel as success, leaving CI green on a no-op repair that should
|
||||||
|
have alerted an operator.
|
||||||
|
"""
|
||||||
|
palace_dir = tmp_path / "palace"
|
||||||
|
palace_dir.mkdir()
|
||||||
|
mock_config_cls.return_value.palace_path = str(palace_dir)
|
||||||
|
|
||||||
|
args = argparse.Namespace(
|
||||||
|
palace=str(palace_dir),
|
||||||
|
mode="from-sqlite",
|
||||||
|
source=None,
|
||||||
|
archive_existing=False,
|
||||||
|
yes=True,
|
||||||
|
)
|
||||||
|
with patch("mempalace.repair.rebuild_from_sqlite", return_value={}):
|
||||||
|
with pytest.raises(SystemExit) as excinfo:
|
||||||
|
cmd_repair(args)
|
||||||
|
assert excinfo.value.code == 1
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.cli.MempalaceConfig")
|
||||||
|
def test_cmd_repair_from_sqlite_success_does_not_exit(mock_config_cls, tmp_path):
|
||||||
|
"""A successful from-sqlite rebuild — even one that finds zero rows
|
||||||
|
in a legitimately empty source palace — must NOT call ``sys.exit``.
|
||||||
|
A populated counts dict (with ``0`` values) is the success signal;
|
||||||
|
only the empty dict ``{}`` is reserved for validation refusal.
|
||||||
|
|
||||||
|
Catches: a regression where ``if not counts`` is replaced by
|
||||||
|
``if not sum(counts.values())`` or similar, conflating "empty source"
|
||||||
|
with "validation refused" and breaking idempotent recovery scripts.
|
||||||
|
"""
|
||||||
|
palace_dir = tmp_path / "palace"
|
||||||
|
palace_dir.mkdir()
|
||||||
|
mock_config_cls.return_value.palace_path = str(palace_dir)
|
||||||
|
|
||||||
|
args = argparse.Namespace(
|
||||||
|
palace=str(palace_dir),
|
||||||
|
mode="from-sqlite",
|
||||||
|
source=None,
|
||||||
|
archive_existing=False,
|
||||||
|
yes=True,
|
||||||
|
)
|
||||||
|
# Zero rows but per-collection keys present → success, no exit.
|
||||||
|
fake_counts = {"mempalace_drawers": 0, "mempalace_closets": 0}
|
||||||
|
with patch("mempalace.repair.rebuild_from_sqlite", return_value=fake_counts):
|
||||||
|
# Should return cleanly; no SystemExit raised.
|
||||||
|
cmd_repair(args)
|
||||||
|
|||||||
@@ -719,6 +719,13 @@ def test_extract_via_sqlite_returns_all_rows_with_metadata(tmp_path):
|
|||||||
Catches: anyone who breaks the segments/embeddings/embedding_metadata
|
Catches: anyone who breaks the segments/embeddings/embedding_metadata
|
||||||
JOIN, swaps the metadata vs vector segment, or changes how the
|
JOIN, swaps the metadata vs vector segment, or changes how the
|
||||||
document is stored under the ``chroma:document`` key.
|
document is stored under the ``chroma:document`` key.
|
||||||
|
|
||||||
|
Also asserts every embedding row underlying the extraction lives in
|
||||||
|
a ``segments.scope = 'METADATA'`` segment. Document + metadata rows
|
||||||
|
are stored under METADATA in Chroma's segment layout while HNSW
|
||||||
|
files live under ``VECTOR``; locking that assumption in here means a
|
||||||
|
future refactor that accidentally points the JOIN at ``VECTOR``
|
||||||
|
fails this test instead of silently regressing the recovery path.
|
||||||
"""
|
"""
|
||||||
rows = [
|
rows = [
|
||||||
(f"drawer_{i:03d}", f"document body {i}", {"wing": "test_wing", "room": f"r{i % 3}"})
|
(f"drawer_{i:03d}", f"document body {i}", {"wing": "test_wing", "room": f"r{i % 3}"})
|
||||||
@@ -736,6 +743,35 @@ def test_extract_via_sqlite_returns_all_rows_with_metadata(tmp_path):
|
|||||||
assert got_doc == doc, f"document mangled for {emb_id}"
|
assert got_doc == doc, f"document mangled for {emb_id}"
|
||||||
assert got_meta == meta, f"metadata mangled for {emb_id}: {got_meta!r}"
|
assert got_meta == meta, f"metadata mangled for {emb_id}: {got_meta!r}"
|
||||||
|
|
||||||
|
# Lock the segment-scope assumption directly against Chroma's on-disk
|
||||||
|
# layout so a future change that points the extraction JOIN at the
|
||||||
|
# VECTOR segment cannot pass this test. Query each extracted row's
|
||||||
|
# backing segment scope via the same SQLite tables ``extract_via_sqlite``
|
||||||
|
# reads from.
|
||||||
|
sqlite_path = os.path.join(str(tmp_path), "chroma.sqlite3")
|
||||||
|
conn = sqlite3.connect(f"file:{sqlite_path}?mode=ro", uri=True)
|
||||||
|
try:
|
||||||
|
scopes = {
|
||||||
|
scope
|
||||||
|
for (scope,) in conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT DISTINCT s.scope
|
||||||
|
FROM embeddings e
|
||||||
|
JOIN segments s ON e.segment_id = s.id
|
||||||
|
JOIN collections c ON s.collection = c.id
|
||||||
|
WHERE c.name = ? AND e.embedding_id IN ({})
|
||||||
|
""".format(",".join("?" * len(extracted))),
|
||||||
|
("mempalace_drawers", *(emb_id for emb_id, _, _ in extracted)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
assert scopes == {"METADATA"}, (
|
||||||
|
f"extraction is reading from segments scoped {scopes!r}; only "
|
||||||
|
"'METADATA' should back the document/metadata rows. If Chroma's "
|
||||||
|
"segment layout changed, update extract_via_sqlite's WHERE clause."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_extract_via_sqlite_preserves_typed_metadata(tmp_path):
|
def test_extract_via_sqlite_preserves_typed_metadata(tmp_path):
|
||||||
"""Chromadb stores int / float / bool / string in distinct typed
|
"""Chromadb stores int / float / bool / string in distinct typed
|
||||||
@@ -973,3 +1009,56 @@ def test_rebuild_from_sqlite_raises_on_upsert_failure(tmp_path, monkeypatch):
|
|||||||
assert err.archive_path is not None
|
assert err.archive_path is not None
|
||||||
assert os.path.isfile(os.path.join(err.archive_path, "chroma.sqlite3"))
|
assert os.path.isfile(os.path.join(err.archive_path, "chroma.sqlite3"))
|
||||||
assert err.dest_palace == os.path.abspath(str(palace))
|
assert err.dest_palace == os.path.abspath(str(palace))
|
||||||
|
|
||||||
|
|
||||||
|
def test_rebuild_from_sqlite_honors_configured_drawer_collection_name(tmp_path, monkeypatch):
|
||||||
|
"""A user with a non-default drawers collection name (set via
|
||||||
|
``MempalaceConfig().collection_name``) must have THAT collection
|
||||||
|
rebuilt — not the hardcoded ``mempalace_drawers``.
|
||||||
|
|
||||||
|
Catches: a regression where the recovery path silently rebuilds the
|
||||||
|
default-name collection on a custom-named palace, leaving the user's
|
||||||
|
actual data unrebuilt while reporting "rebuild complete." This is
|
||||||
|
the failure mode reviewer mjc flagged on PR #1310 as needing to line
|
||||||
|
up with the configured-collection-name work in #1312. Closets stay
|
||||||
|
fixed (``mempalace_closets``) by design — the AAAK index references
|
||||||
|
drawer IDs by string and is not per-deployment configurable.
|
||||||
|
|
||||||
|
Strategy: monkeypatch the lazy resolver so the test is hermetic and
|
||||||
|
does not depend on the global config file or env state.
|
||||||
|
"""
|
||||||
|
from mempalace.backends.chroma import ChromaBackend
|
||||||
|
|
||||||
|
custom_drawers = "custom_drawers_xyz"
|
||||||
|
monkeypatch.setattr(repair, "_drawers_collection_name", lambda: custom_drawers)
|
||||||
|
|
||||||
|
source = tmp_path / "source"
|
||||||
|
dest = tmp_path / "dest"
|
||||||
|
|
||||||
|
drawer_rows = [(f"d{i}", f"body {i}", {"wing": "alpha"}) for i in range(3)]
|
||||||
|
closet_rows = [("closet_a", "abbrev →d0", {"wing": "alpha"})]
|
||||||
|
_seed_palace(source, custom_drawers, drawer_rows)
|
||||||
|
_seed_palace(source, "mempalace_closets", closet_rows)
|
||||||
|
|
||||||
|
counts = repair.rebuild_from_sqlite(str(source), str(dest))
|
||||||
|
|
||||||
|
# Rebuilt under the custom name, not under the default "mempalace_drawers".
|
||||||
|
assert counts == {custom_drawers: 3, "mempalace_closets": 1}
|
||||||
|
|
||||||
|
backend = ChromaBackend()
|
||||||
|
rebuilt_drawers = backend.get_collection(str(dest), custom_drawers)
|
||||||
|
assert rebuilt_drawers.count() == 3
|
||||||
|
|
||||||
|
# Default-name collection must NOT exist in dest — proves we did not
|
||||||
|
# silently fall back to the hardcoded name during rebuild.
|
||||||
|
try:
|
||||||
|
rebuilt_default = backend.get_collection(str(dest), "mempalace_drawers")
|
||||||
|
# If get_collection returns without raising, count() should be 0
|
||||||
|
# (chromadb may auto-create on get with some EFs); a non-zero
|
||||||
|
# count would mean we wrote rows to the wrong collection.
|
||||||
|
assert rebuilt_default.count() == 0, (
|
||||||
|
"rebuild leaked rows into the default-name collection on a "
|
||||||
|
"custom-name palace — recovery wrote to the wrong collection."
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass # Expected: collection wasn't created.
|
||||||
|
|||||||
Reference in New Issue
Block a user