Files
mempalace/tests/test_searcher.py
T
Igor Lins e Silva 133dfbfb41 fix(search): BM25 hybrid rerank, legacy-metric warning, invariant tests
Three tightly-coupled search-quality fixes for v3.3.3:

1. CLI `mempalace search` now routes through the same `_hybrid_rank`
   the MCP path already used. Drawers whose text contains every query
   term but embed as file-tree noise (directory listings, diffs, log
   fragments) were scoring cosine distance >= 1.0 — the display formula
   `max(0, 1 - dist)` then floored every result to `Match: 0.0`, with
   no way for the user to tell a lexical match from a total miss. BM25
   catches these cleanly; the display surfaces both `cosine=` and
   `bm25=` so users see which component is firing.

2. Legacy-palace distance-metric warning. Palaces created before
   `hnsw:space=cosine` was consistently set silently use ChromaDB's
   default L2 metric, which breaks the cosine-similarity formula (L2
   distances routinely exceed 1.0 on normalized 384-dim vectors). The
   search path now detects this at query time and prints a one-line
   notice pointing at `mempalace repair`. Only fires for legacy
   palaces; new palaces already set cosine correctly.

3. Invariant tests pinning `hnsw:space=cosine` on every collection-
   creation path — legacy `get_or_create_collection`, legacy
   `create_collection`, RFC 001 `get_collection(create=True)`, the
   public `palace.get_collection`, and a round-trip through reopen.
   Locks down the correctness that new-user palaces already have so a
   future refactor can't silently regress it.

Also adds a `metadata` property to `ChromaCollection` so callers can
read the underlying hnsw:space without reaching into `_collection`.

Tests:
- New regression: simulate three candidates at distance 1.5 (cosine=0),
  one containing query terms — must rank first with non-zero bm25.
- New: legacy metric (empty or non-cosine) produces stderr warning.
- New: correctly-configured palace produces no warning.
- New: all five creation paths pin cosine metadata.

All existing tests still pass.
2026-04-25 00:39:37 -03:00

273 lines
12 KiB
Python

"""
test_searcher.py -- Tests for both search() (CLI) and search_memories() (API).
Uses the real ChromaDB fixtures from conftest.py for integration tests,
plus mock-based tests for error paths.
"""
from unittest.mock import MagicMock, patch
import pytest
from mempalace.searcher import SearchError, search, search_memories
# ── search_memories (API) ──────────────────────────────────────────────
class TestSearchMemories:
def test_basic_search(self, palace_path, seeded_collection):
result = search_memories("JWT authentication", palace_path)
assert "results" in result
assert len(result["results"]) > 0
assert result["query"] == "JWT authentication"
def test_wing_filter(self, palace_path, seeded_collection):
result = search_memories("planning", palace_path, wing="notes")
assert all(r["wing"] == "notes" for r in result["results"])
def test_room_filter(self, palace_path, seeded_collection):
result = search_memories("database", palace_path, room="backend")
assert all(r["room"] == "backend" for r in result["results"])
def test_wing_and_room_filter(self, palace_path, seeded_collection):
result = search_memories("code", palace_path, wing="project", room="frontend")
assert all(r["wing"] == "project" and r["room"] == "frontend" for r in result["results"])
def test_n_results_limit(self, palace_path, seeded_collection):
result = search_memories("code", palace_path, n_results=2)
assert len(result["results"]) <= 2
def test_no_palace_returns_error(self, tmp_path):
result = search_memories("anything", str(tmp_path / "missing"))
assert "error" in result
def test_result_fields(self, palace_path, seeded_collection):
result = search_memories("authentication", palace_path)
hit = result["results"][0]
assert "text" in hit
assert "wing" in hit
assert "room" in hit
assert "source_file" in hit
assert "similarity" in hit
assert isinstance(hit["similarity"], float)
assert "created_at" in hit
def test_created_at_contains_filed_at(self, palace_path, seeded_collection):
"""created_at surfaces the filed_at metadata from the drawer."""
result = search_memories("JWT authentication", palace_path)
hit = result["results"][0]
assert hit["created_at"] == "2026-01-01T00:00:00"
def test_created_at_fallback_when_filed_at_missing(self):
"""created_at defaults to 'unknown' when filed_at is absent."""
mock_col = MagicMock()
mock_col.query.return_value = {
"ids": [["drawer_no_date"]],
"documents": [["Some text without a date"]],
"metadatas": [[{"wing": "project", "room": "backend", "source_file": "x.py"}]],
"distances": [[0.1]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
result = search_memories("test", "/fake/path")
hit = result["results"][0]
assert hit["created_at"] == "unknown"
def test_search_memories_query_error(self):
"""search_memories returns error dict when query raises."""
mock_col = MagicMock()
mock_col.query.side_effect = RuntimeError("query failed")
with patch("mempalace.searcher.get_collection", return_value=mock_col):
result = search_memories("test", "/fake/path")
assert "error" in result
assert "query failed" in result["error"]
def test_search_memories_filters_in_result(self, palace_path, seeded_collection):
result = search_memories("test", palace_path, wing="project", room="backend")
assert result["filters"]["wing"] == "project"
assert result["filters"]["room"] == "backend"
def test_search_memories_handles_none_metadata(self):
"""API path: `None` entries in the drawer results' metadatas list must
fall back to the sentinel strings (wing/room 'unknown', source '?')
rather than raising `AttributeError: 'NoneType' object has no
attribute 'get'` while the rest of the result set renders."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
"ids": [["d1", "d2"]],
}
def mock_get_collection(path, create=False):
# First call: drawers. Second call: closets — raise so hybrid
# degrades to pure drawer search (the catch block covers it).
if not hasattr(mock_get_collection, "_called"):
mock_get_collection._called = True
return mock_col
raise RuntimeError("no closets")
with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
result = search_memories("anything", "/fake/path")
assert "results" in result
assert len(result["results"]) == 2
# The None-metadata hit renders with sentinel values, not a crash.
none_hit = result["results"][1]
assert none_hit["text"] == "second doc"
assert none_hit["wing"] == "unknown"
assert none_hit["room"] == "unknown"
# ── search() (CLI print function) ─────────────────────────────────────
class TestSearchCLI:
def test_search_prints_results(self, palace_path, seeded_collection, capsys):
search("JWT authentication", palace_path)
captured = capsys.readouterr()
assert "JWT" in captured.out or "authentication" in captured.out
def test_search_with_wing_filter(self, palace_path, seeded_collection, capsys):
search("planning", palace_path, wing="notes")
captured = capsys.readouterr()
assert "Results for" in captured.out
def test_search_with_room_filter(self, palace_path, seeded_collection, capsys):
search("database", palace_path, room="backend")
captured = capsys.readouterr()
assert "Room:" in captured.out
def test_search_with_wing_and_room(self, palace_path, seeded_collection, capsys):
search("code", palace_path, wing="project", room="frontend")
captured = capsys.readouterr()
assert "Wing:" in captured.out
assert "Room:" in captured.out
def test_search_no_palace_raises(self, tmp_path):
with pytest.raises(SearchError, match="No palace found"):
search("anything", str(tmp_path / "missing"))
def test_search_no_results(self, palace_path, collection, capsys):
"""Empty collection returns no results message."""
# collection is empty (no seeded data)
result = search("xyzzy_nonexistent_query", palace_path, n_results=1)
captured = capsys.readouterr()
# Either prints "No results" or returns None
assert result is None or "No results" in captured.out
def test_search_query_error_raises(self):
"""search raises SearchError when query fails."""
mock_col = MagicMock()
mock_col.query.side_effect = RuntimeError("boom")
with patch("mempalace.searcher.get_collection", return_value=mock_col):
with pytest.raises(SearchError, match="Search error"):
search("test", "/fake/path")
def test_search_n_results(self, palace_path, seeded_collection, capsys):
search("code", palace_path, n_results=1)
captured = capsys.readouterr()
# Should have output with at least one result block
assert "[1]" in captured.out
def test_search_applies_bm25_hybrid_rerank(self, capsys):
"""CLI search must call the same hybrid rerank that the MCP path uses.
Regression for a bug where the CLI only consulted ChromaDB cosine
distance: a drawer whose body contained every query term still
scored zero similarity if its embedding happened to be far from
the query (e.g. the drawer was a shell-output fragment that
embeds as "file tree noise"). Hybrid rerank fixes this by
combining BM25 with cosine — lexical matches rise above pure
vector noise.
Simulates: three candidates, all with distance >= 1.0 (cosine = 0);
candidate 2 contains every query term. After the fix, candidate 2
should rank first and display a non-zero bm25 score.
"""
mock_col = MagicMock()
mock_col.metadata = {"hnsw:space": "cosine"}
mock_col.query.return_value = {
"documents": [
[
"unrelated directory listing -rw-rw-r-- file.txt",
"foo bar baz is a multi-word phrase",
"another unrelated chunk about colors",
]
],
"metadatas": [
[
{"source_file": "a.md", "wing": "w", "room": "r"},
{"source_file": "b.md", "wing": "w", "room": "r"},
{"source_file": "c.md", "wing": "w", "room": "r"},
]
],
"distances": [[1.5, 1.5, 1.5]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
search("foo bar baz", "/fake/path")
captured = capsys.readouterr()
first_block, _, _ = captured.out.partition("[2]")
# Lexical match must rank first
assert (
"b.md" in first_block
), f"expected lexical match 'b.md' at rank 1, got:\n{captured.out}"
# Non-zero bm25 reported
assert "bm25=" in first_block
assert "bm25=0.0" not in first_block
# Cosine still reported for transparency
assert "cosine=" in first_block
def test_search_warns_when_palace_uses_wrong_distance_metric(self, capsys):
"""Legacy palaces created without `hnsw:space=cosine` silently
use L2, which breaks similarity interpretation. CLI must warn
the user and point them at `mempalace repair` rather than
pretending the `Match` scores are meaningful."""
mock_col = MagicMock()
mock_col.metadata = {} # legacy: no hnsw:space set
mock_col.query.return_value = {
"documents": [["some drawer content"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}]],
"distances": [[1.2]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
search("anything", "/fake/path")
captured = capsys.readouterr()
assert "mempalace repair" in captured.err
assert "cosine" in captured.err.lower()
def test_search_does_not_warn_when_palace_is_correctly_configured(self, capsys):
mock_col = MagicMock()
mock_col.metadata = {"hnsw:space": "cosine"}
mock_col.query.return_value = {
"documents": [["some drawer content"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}]],
"distances": [[0.3]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
search("anything", "/fake/path")
captured = capsys.readouterr()
assert "mempalace repair" not in captured.err
def test_search_handles_none_metadata_without_crash(self, palace_path, capsys):
"""ChromaDB can return `None` entries in the metadatas list when a
drawer has no metadata. The CLI print path must not crash on them
mid-render — it used to raise `AttributeError: 'NoneType' object has
no attribute 'get'` after printing earlier results."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
search("anything", "/fake/path")
captured = capsys.readouterr()
assert "[1]" in captured.out
assert "[2]" in captured.out
# Second result renders with fallback '?' values instead of crashing
assert "second doc" in captured.out