fix(search): BM25 hybrid rerank, legacy-metric warning, invariant tests
Three tightly-coupled search-quality fixes for v3.3.3: 1. CLI `mempalace search` now routes through the same `_hybrid_rank` the MCP path already used. Drawers whose text contains every query term but embed as file-tree noise (directory listings, diffs, log fragments) were scoring cosine distance >= 1.0 — the display formula `max(0, 1 - dist)` then floored every result to `Match: 0.0`, with no way for the user to tell a lexical match from a total miss. BM25 catches these cleanly; the display surfaces both `cosine=` and `bm25=` so users see which component is firing. 2. Legacy-palace distance-metric warning. Palaces created before `hnsw:space=cosine` was consistently set silently use ChromaDB's default L2 metric, which breaks the cosine-similarity formula (L2 distances routinely exceed 1.0 on normalized 384-dim vectors). The search path now detects this at query time and prints a one-line notice pointing at `mempalace repair`. Only fires for legacy palaces; new palaces already set cosine correctly. 3. Invariant tests pinning `hnsw:space=cosine` on every collection- creation path — legacy `get_or_create_collection`, legacy `create_collection`, RFC 001 `get_collection(create=True)`, the public `palace.get_collection`, and a round-trip through reopen. Locks down the correctness that new-user palaces already have so a future refactor can't silently regress it. Also adds a `metadata` property to `ChromaCollection` so callers can read the underlying hnsw:space without reaching into `_collection`. Tests: - New regression: simulate three candidates at distance 1.5 (cosine=0), one containing query terms — must rank first with non-zero bm25. - New: legacy metric (empty or non-cosine) produces stderr warning. - New: correctly-configured palace produces no warning. - New: all five creation paths pin cosine metadata. All existing tests still pass.
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
"""Invariant tests: every ChromaDB collection-creation path must set
|
||||
``hnsw:space=cosine``.
|
||||
|
||||
Reason: ChromaDB's default HNSW distance is L2 (Euclidean). Under L2,
|
||||
the searcher's ``max(0, 1 - distance)`` similarity formula systematically
|
||||
floors to 0 because L2 distances on normalized 384-dim vectors routinely
|
||||
exceed 1.0 — users then see flat ``Match: 0.0`` across every result and
|
||||
have no signal that their palace is broken.
|
||||
|
||||
This test file locks the invariant so a future refactor that drops the
|
||||
``metadata={"hnsw:space": "cosine"}`` parameter from any creation path
|
||||
gets caught at test time rather than silently degrading search quality.
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
|
||||
from mempalace.backends.chroma import ChromaBackend
|
||||
from mempalace.palace import get_collection
|
||||
|
||||
|
||||
EXPECTED_METRIC = "cosine"
|
||||
|
||||
|
||||
def _assert_cosine(col, where: str) -> None:
|
||||
meta = col.metadata if hasattr(col, "metadata") else col._collection.metadata
|
||||
assert isinstance(meta, dict), f"{where}: expected metadata dict, got {meta!r}"
|
||||
assert meta.get("hnsw:space") == EXPECTED_METRIC, (
|
||||
f"{where}: expected hnsw:space={EXPECTED_METRIC!r}, got {meta!r}. "
|
||||
"A collection without cosine metric will silently break the "
|
||||
"similarity formula used by the searcher."
|
||||
)
|
||||
|
||||
|
||||
def test_legacy_get_or_create_collection_sets_cosine(tmp_path):
|
||||
backend = ChromaBackend()
|
||||
col = backend.get_or_create_collection(str(tmp_path), "mempalace_drawers")
|
||||
_assert_cosine(col, "legacy get_or_create_collection")
|
||||
|
||||
|
||||
def test_legacy_create_collection_sets_cosine(tmp_path):
|
||||
backend = ChromaBackend()
|
||||
col = backend.create_collection(str(tmp_path), "mempalace_drawers")
|
||||
_assert_cosine(col, "legacy create_collection")
|
||||
|
||||
|
||||
def test_new_get_collection_with_create_sets_cosine(tmp_path):
|
||||
"""RFC 001 typed surface — ``get_collection(..., create=True)`` is the
|
||||
path the miner + init flow take. Must also set cosine."""
|
||||
backend = ChromaBackend()
|
||||
col = backend.get_collection(str(tmp_path), "mempalace_drawers", create=True)
|
||||
_assert_cosine(col, "get_collection(create=True)")
|
||||
|
||||
|
||||
def test_palace_module_get_collection_sets_cosine(tmp_path):
|
||||
"""The public ``mempalace.palace.get_collection`` is what most callers
|
||||
use. Must produce cosine palaces."""
|
||||
col = get_collection(str(tmp_path), "mempalace_drawers", create=True)
|
||||
_assert_cosine(col, "palace.get_collection(create=True)")
|
||||
|
||||
|
||||
def test_reopening_cosine_palace_preserves_metric(tmp_path):
|
||||
"""Opening a previously-created cosine palace (create=False) must
|
||||
still expose the cosine metadata — catches any regression where
|
||||
reopening drops or overwrites metadata."""
|
||||
backend = ChromaBackend()
|
||||
backend.create_collection(str(tmp_path), "mempalace_drawers")
|
||||
# Fresh backend simulates a process restart
|
||||
backend2 = ChromaBackend()
|
||||
col = backend2.get_collection(str(tmp_path), "mempalace_drawers", create=False)
|
||||
_assert_cosine(col, "re-opened palace")
|
||||
|
||||
|
||||
def test_fresh_palace_via_full_stack_gets_cosine():
|
||||
"""End-to-end: build a palace with the public API the way a new user
|
||||
would, confirm the resulting collection uses cosine distance."""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
col = get_collection(tmp, "mempalace_drawers", create=True)
|
||||
_assert_cosine(col, "full-stack new palace")
|
||||
|
||||
# And the closets collection too
|
||||
closets = get_collection(tmp, "mempalace_closets", create=True)
|
||||
_assert_cosine(closets, "full-stack new closets")
|
||||
@@ -173,6 +173,85 @@ class TestSearchCLI:
|
||||
# Should have output with at least one result block
|
||||
assert "[1]" in captured.out
|
||||
|
||||
def test_search_applies_bm25_hybrid_rerank(self, capsys):
|
||||
"""CLI search must call the same hybrid rerank that the MCP path uses.
|
||||
|
||||
Regression for a bug where the CLI only consulted ChromaDB cosine
|
||||
distance: a drawer whose body contained every query term still
|
||||
scored zero similarity if its embedding happened to be far from
|
||||
the query (e.g. the drawer was a shell-output fragment that
|
||||
embeds as "file tree noise"). Hybrid rerank fixes this by
|
||||
combining BM25 with cosine — lexical matches rise above pure
|
||||
vector noise.
|
||||
|
||||
Simulates: three candidates, all with distance >= 1.0 (cosine = 0);
|
||||
candidate 2 contains every query term. After the fix, candidate 2
|
||||
should rank first and display a non-zero bm25 score.
|
||||
"""
|
||||
mock_col = MagicMock()
|
||||
mock_col.metadata = {"hnsw:space": "cosine"}
|
||||
mock_col.query.return_value = {
|
||||
"documents": [
|
||||
[
|
||||
"unrelated directory listing -rw-rw-r-- file.txt",
|
||||
"foo bar baz is a multi-word phrase",
|
||||
"another unrelated chunk about colors",
|
||||
]
|
||||
],
|
||||
"metadatas": [
|
||||
[
|
||||
{"source_file": "a.md", "wing": "w", "room": "r"},
|
||||
{"source_file": "b.md", "wing": "w", "room": "r"},
|
||||
{"source_file": "c.md", "wing": "w", "room": "r"},
|
||||
]
|
||||
],
|
||||
"distances": [[1.5, 1.5, 1.5]],
|
||||
}
|
||||
with patch("mempalace.searcher.get_collection", return_value=mock_col):
|
||||
search("foo bar baz", "/fake/path")
|
||||
captured = capsys.readouterr()
|
||||
first_block, _, _ = captured.out.partition("[2]")
|
||||
# Lexical match must rank first
|
||||
assert (
|
||||
"b.md" in first_block
|
||||
), f"expected lexical match 'b.md' at rank 1, got:\n{captured.out}"
|
||||
# Non-zero bm25 reported
|
||||
assert "bm25=" in first_block
|
||||
assert "bm25=0.0" not in first_block
|
||||
# Cosine still reported for transparency
|
||||
assert "cosine=" in first_block
|
||||
|
||||
def test_search_warns_when_palace_uses_wrong_distance_metric(self, capsys):
|
||||
"""Legacy palaces created without `hnsw:space=cosine` silently
|
||||
use L2, which breaks similarity interpretation. CLI must warn
|
||||
the user and point them at `mempalace repair` rather than
|
||||
pretending the `Match` scores are meaningful."""
|
||||
mock_col = MagicMock()
|
||||
mock_col.metadata = {} # legacy: no hnsw:space set
|
||||
mock_col.query.return_value = {
|
||||
"documents": [["some drawer content"]],
|
||||
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}]],
|
||||
"distances": [[1.2]],
|
||||
}
|
||||
with patch("mempalace.searcher.get_collection", return_value=mock_col):
|
||||
search("anything", "/fake/path")
|
||||
captured = capsys.readouterr()
|
||||
assert "mempalace repair" in captured.err
|
||||
assert "cosine" in captured.err.lower()
|
||||
|
||||
def test_search_does_not_warn_when_palace_is_correctly_configured(self, capsys):
|
||||
mock_col = MagicMock()
|
||||
mock_col.metadata = {"hnsw:space": "cosine"}
|
||||
mock_col.query.return_value = {
|
||||
"documents": [["some drawer content"]],
|
||||
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}]],
|
||||
"distances": [[0.3]],
|
||||
}
|
||||
with patch("mempalace.searcher.get_collection", return_value=mock_col):
|
||||
search("anything", "/fake/path")
|
||||
captured = capsys.readouterr()
|
||||
assert "mempalace repair" not in captured.err
|
||||
|
||||
def test_search_handles_none_metadata_without_crash(self, palace_path, capsys):
|
||||
"""ChromaDB can return `None` entries in the metadatas list when a
|
||||
drawer has no metadata. The CLI print path must not crash on them
|
||||
|
||||
Reference in New Issue
Block a user