diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index 01ac627..d9b99a4 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -993,7 +993,7 @@ class ChromaBackend(BaseBackend): ) if cached is None or inode_changed or mtime_changed or mtime_appeared: - _fix_blob_seq_ids(palace_path) + ChromaBackend._prepare_palace_for_open(palace_path) cached = chromadb.PersistentClient(path=palace_path) self._clients[palace_path] = cached # Re-stat after the client constructor runs: chromadb creates @@ -1028,6 +1028,31 @@ class ChromaBackend(BaseBackend): # safety property; locking would add cost without correctness gain. _quarantined_paths: set[str] = set() + @staticmethod + def _prepare_palace_for_open(palace_path: str) -> None: + """Run the pre-open safety pass shared by :meth:`make_client` and + :meth:`_client`. + + Two steps, both required before constructing a ``PersistentClient``: + + 1. ``_fix_blob_seq_ids`` — repairs the BLOB seq_id quirk that bites + certain chromadb migrations. + 2. ``quarantine_stale_hnsw`` — gated by :attr:`_quarantined_paths` so + it fires once per palace per process. This is the SIGSEGV + prevention path for stale HNSW segments (see #1121, #1132, #1263); + wiring it through this helper means CLI mining, search, repair, + and status all benefit, not just the legacy ``make_client`` + callers. + + Idempotent: safe to call from any code path that is about to open or + re-open a palace. The ``_quarantined_paths`` gate prevents thrash on + hot paths (e.g. ``_client()`` is called on every backend operation). + """ + _fix_blob_seq_ids(palace_path) + if palace_path not in ChromaBackend._quarantined_paths: + quarantine_stale_hnsw(palace_path) + ChromaBackend._quarantined_paths.add(palace_path) + @staticmethod def make_client(palace_path: str): """Create a fresh ``PersistentClient`` (fixes BLOB seq_ids first). @@ -1040,10 +1065,7 @@ class ChromaBackend(BaseBackend): :attr:`_quarantined_paths` for the rationale (cold-start protection vs. runtime thrash on steady-write daemons). """ - _fix_blob_seq_ids(palace_path) - if palace_path not in ChromaBackend._quarantined_paths: - quarantine_stale_hnsw(palace_path) - ChromaBackend._quarantined_paths.add(palace_path) + ChromaBackend._prepare_palace_for_open(palace_path) return chromadb.PersistentClient(path=palace_path) @staticmethod diff --git a/tests/test_backends.py b/tests/test_backends.py index 5efa71b..4ddfe12 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -764,6 +764,67 @@ def test_make_client_quarantines_each_palace_independently(tmp_path, monkeypatch assert calls == [palace_a, palace_b] +# ── _client() cold-start gate (#1121, #1132, #1263) ────────────────────── + + +def test_client_quarantines_corrupt_segment_on_first_open(tmp_path, monkeypatch): + """The instance ``_client()`` path must run ``quarantine_stale_hnsw`` + on first open, mirroring the ``make_client()`` static helper. Before + PR #1173's wiring was extended here, CLI mining / search / repair / + status all skipped the quarantine pass and would SIGSEGV on a stale + HNSW segment (#1121, #1132, #1263).""" + now = 1_700_000_000.0 + palace, seg = _make_palace_with_segment( + tmp_path, + hnsw_mtime=now - 7200, + sqlite_mtime=now, + meta_bytes=_CORRUPT_META, + ) + + monkeypatch.setattr(ChromaBackend, "_quarantined_paths", set()) + + backend = ChromaBackend() + try: + backend._client(str(palace)) + finally: + backend.close() + + assert not seg.exists(), "_client() should have quarantined the corrupt segment" + drift_dirs = [p for p in palace.iterdir() if ".drift-" in p.name] + assert len(drift_dirs) == 1 + + +def test_client_quarantines_only_on_first_call_per_palace(tmp_path, monkeypatch): + """Repeated ``_client()`` calls for the same palace re-run quarantine + at most once — the ``_quarantined_paths`` gate prevents runtime + thrash on hot paths (``_client()`` is hit on every backend op).""" + palace_path = str(tmp_path / "palace") + os.makedirs(palace_path, exist_ok=True) + (Path(palace_path) / "chroma.sqlite3").write_text("") + + monkeypatch.setattr(ChromaBackend, "_quarantined_paths", set()) + + calls: list[str] = [] + + def _spy(path, stale_seconds=300.0): + calls.append(path) + return [] + + monkeypatch.setattr("mempalace.backends.chroma.quarantine_stale_hnsw", _spy) + + backend = ChromaBackend() + try: + backend._client(palace_path) + backend._client(palace_path) + backend._client(palace_path) + finally: + backend.close() + + assert ( + calls == [palace_path] + ), "quarantine_stale_hnsw should fire once per palace per process from _client(), not on every call" + + # ── _pin_hnsw_threads (per-process retrofit, separate from this PR's gate) ──