diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index 034c28b..d43b884 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -72,6 +72,12 @@ def _segment_appears_healthy(seg_dir: str) -> bool: can execute arbitrary code, and the byte-sniff is sufficient to distinguish a complete write from truncation, zero-fill, or partial-flush corruption. + + Assumes pickle protocol >= 2 (``0x80`` PROTO marker). Matches what + chromadb writes today; if a future chromadb version emits protocol + 0/1 segments, this check would start returning False on healthy + files and quarantine_stale_hnsw would conservatively rename them + out of the way (lazy rebuild on next open recovers). """ meta_path = os.path.join(seg_dir, "index_metadata.pickle") if not os.path.isfile(meta_path): @@ -621,6 +627,14 @@ class ChromaBackend(BaseBackend): # Real runtime drift is still handled — palace-daemon's ``_auto_repair`` # calls :func:`quarantine_stale_hnsw` directly on observed HNSW errors, # which bypasses this gate. + # + # Thread-safety: this set is mutated without a lock. Two concurrent + # ``make_client()`` calls for the same palace can both pass the + # membership check and both invoke ``quarantine_stale_hnsw``. That's + # safe because the function is idempotent (mtime check + timestamped + # rename of distinct directories), so the worst-case race produces + # one redundant rename attempt that no-ops. Idempotency is the + # safety property; locking would add cost without correctness gain. _quarantined_paths: set[str] = set() @staticmethod diff --git a/tests/test_backends.py b/tests/test_backends.py index ec4012a..da4f4d8 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -578,3 +578,52 @@ def test_make_client_quarantines_each_palace_independently(tmp_path, monkeypatch assert calls == [palace_a, palace_b] +# ── _pin_hnsw_threads (per-process retrofit, separate from this PR's gate) ── + + +def test_pin_hnsw_threads_retrofits_legacy_collection(tmp_path): + """Legacy collections (created without num_threads) get the retrofit applied.""" + palace_path = tmp_path / "legacy-palace" + palace_path.mkdir() + + client = chromadb.PersistentClient(path=str(palace_path)) + col = client.create_collection( + "mempalace_drawers", + metadata={"hnsw:space": "cosine"}, # no num_threads — legacy + ) + assert col.configuration_json.get("hnsw", {}).get("num_threads") is None + + _pin_hnsw_threads(col) + + assert col.configuration_json["hnsw"]["num_threads"] == 1 + + +def test_pin_hnsw_threads_swallows_all_errors(): + """Retrofit never raises even when collection.modify explodes.""" + + class _ExplodingCollection: + def modify(self, *args, **kwargs): + raise RuntimeError("boom") + + _pin_hnsw_threads(_ExplodingCollection()) # must not raise + + +def test_get_collection_applies_retrofit_on_existing_palace(tmp_path): + """ChromaBackend.get_collection(create=False) applies the retrofit.""" + palace_path = tmp_path / "palace" + palace_path.mkdir() + + # Simulate a legacy palace: create collection without num_threads + bootstrap_client = chromadb.PersistentClient(path=str(palace_path)) + bootstrap_client.create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + del bootstrap_client # drop reference so a fresh client reopens cleanly + + wrapper = ChromaBackend().get_collection( + str(palace_path), + collection_name="mempalace_drawers", + create=False, + ) + + assert wrapper._collection.configuration_json["hnsw"]["num_threads"] == 1 + +