Files
mempalace/tests/test_searcher.py
T
jp 7690574dde fix(searcher): guard API path + closet loop against None metadata too
Per Copilot review on the CLI-only PR (#999): search_memories() has the
same vulnerability in two additional spots, since ChromaDB can return
None entries in the inner metadatas list for either the drawer query or
the closets query. Without guards, the API path crashes with:

    AttributeError: 'NoneType' object has no attribute 'get'

at either \`cmeta.get("source_file", "")\` in the closet boost lookup or
\`meta.get("source_file", "") or ""\` in the drawer scoring loop.

Applies the matching \`meta = meta or {}\` / \`cmeta = cmeta or {}\`
guard at both sites and adds an API-path regression test that mocks a
drawer query result with a None metadata entry and asserts both hits
render — the None-metadata hit with the existing \`"unknown"\` sentinel
values the scoring loop already writes for missing keys.

Verified both the new API test and the existing CLI test fail without
the guards (AttributeError) and pass with them.
2026-04-18 10:37:05 -07:00

194 lines
8.6 KiB
Python

"""
test_searcher.py -- Tests for both search() (CLI) and search_memories() (API).
Uses the real ChromaDB fixtures from conftest.py for integration tests,
plus mock-based tests for error paths.
"""
from unittest.mock import MagicMock, patch
import pytest
from mempalace.searcher import SearchError, search, search_memories
# ── search_memories (API) ──────────────────────────────────────────────
class TestSearchMemories:
def test_basic_search(self, palace_path, seeded_collection):
result = search_memories("JWT authentication", palace_path)
assert "results" in result
assert len(result["results"]) > 0
assert result["query"] == "JWT authentication"
def test_wing_filter(self, palace_path, seeded_collection):
result = search_memories("planning", palace_path, wing="notes")
assert all(r["wing"] == "notes" for r in result["results"])
def test_room_filter(self, palace_path, seeded_collection):
result = search_memories("database", palace_path, room="backend")
assert all(r["room"] == "backend" for r in result["results"])
def test_wing_and_room_filter(self, palace_path, seeded_collection):
result = search_memories("code", palace_path, wing="project", room="frontend")
assert all(r["wing"] == "project" and r["room"] == "frontend" for r in result["results"])
def test_n_results_limit(self, palace_path, seeded_collection):
result = search_memories("code", palace_path, n_results=2)
assert len(result["results"]) <= 2
def test_no_palace_returns_error(self, tmp_path):
result = search_memories("anything", str(tmp_path / "missing"))
assert "error" in result
def test_result_fields(self, palace_path, seeded_collection):
result = search_memories("authentication", palace_path)
hit = result["results"][0]
assert "text" in hit
assert "wing" in hit
assert "room" in hit
assert "source_file" in hit
assert "similarity" in hit
assert isinstance(hit["similarity"], float)
assert "created_at" in hit
def test_created_at_contains_filed_at(self, palace_path, seeded_collection):
"""created_at surfaces the filed_at metadata from the drawer."""
result = search_memories("JWT authentication", palace_path)
hit = result["results"][0]
assert hit["created_at"] == "2026-01-01T00:00:00"
def test_created_at_fallback_when_filed_at_missing(self):
"""created_at defaults to 'unknown' when filed_at is absent."""
mock_col = MagicMock()
mock_col.query.return_value = {
"ids": [["drawer_no_date"]],
"documents": [["Some text without a date"]],
"metadatas": [[{"wing": "project", "room": "backend", "source_file": "x.py"}]],
"distances": [[0.1]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
result = search_memories("test", "/fake/path")
hit = result["results"][0]
assert hit["created_at"] == "unknown"
def test_search_memories_query_error(self):
"""search_memories returns error dict when query raises."""
mock_col = MagicMock()
mock_col.query.side_effect = RuntimeError("query failed")
with patch("mempalace.searcher.get_collection", return_value=mock_col):
result = search_memories("test", "/fake/path")
assert "error" in result
assert "query failed" in result["error"]
def test_search_memories_filters_in_result(self, palace_path, seeded_collection):
result = search_memories("test", palace_path, wing="project", room="backend")
assert result["filters"]["wing"] == "project"
assert result["filters"]["room"] == "backend"
def test_search_memories_handles_none_metadata(self):
"""API path: `None` entries in the drawer results' metadatas list must
fall back to the sentinel strings (wing/room 'unknown', source '?')
rather than raising `AttributeError: 'NoneType' object has no
attribute 'get'` while the rest of the result set renders."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
"ids": [["d1", "d2"]],
}
def mock_get_collection(path, create=False):
# First call: drawers. Second call: closets — raise so hybrid
# degrades to pure drawer search (the catch block covers it).
if not hasattr(mock_get_collection, "_called"):
mock_get_collection._called = True
return mock_col
raise RuntimeError("no closets")
with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
result = search_memories("anything", "/fake/path")
assert "results" in result
assert len(result["results"]) == 2
# The None-metadata hit renders with sentinel values, not a crash.
none_hit = result["results"][1]
assert none_hit["text"] == "second doc"
assert none_hit["wing"] == "unknown"
assert none_hit["room"] == "unknown"
# ── search() (CLI print function) ─────────────────────────────────────
class TestSearchCLI:
def test_search_prints_results(self, palace_path, seeded_collection, capsys):
search("JWT authentication", palace_path)
captured = capsys.readouterr()
assert "JWT" in captured.out or "authentication" in captured.out
def test_search_with_wing_filter(self, palace_path, seeded_collection, capsys):
search("planning", palace_path, wing="notes")
captured = capsys.readouterr()
assert "Results for" in captured.out
def test_search_with_room_filter(self, palace_path, seeded_collection, capsys):
search("database", palace_path, room="backend")
captured = capsys.readouterr()
assert "Room:" in captured.out
def test_search_with_wing_and_room(self, palace_path, seeded_collection, capsys):
search("code", palace_path, wing="project", room="frontend")
captured = capsys.readouterr()
assert "Wing:" in captured.out
assert "Room:" in captured.out
def test_search_no_palace_raises(self, tmp_path):
with pytest.raises(SearchError, match="No palace found"):
search("anything", str(tmp_path / "missing"))
def test_search_no_results(self, palace_path, collection, capsys):
"""Empty collection returns no results message."""
# collection is empty (no seeded data)
result = search("xyzzy_nonexistent_query", palace_path, n_results=1)
captured = capsys.readouterr()
# Either prints "No results" or returns None
assert result is None or "No results" in captured.out
def test_search_query_error_raises(self):
"""search raises SearchError when query fails."""
mock_col = MagicMock()
mock_col.query.side_effect = RuntimeError("boom")
with patch("mempalace.searcher.get_collection", return_value=mock_col):
with pytest.raises(SearchError, match="Search error"):
search("test", "/fake/path")
def test_search_n_results(self, palace_path, seeded_collection, capsys):
search("code", palace_path, n_results=1)
captured = capsys.readouterr()
# Should have output with at least one result block
assert "[1]" in captured.out
def test_search_handles_none_metadata_without_crash(self, palace_path, capsys):
"""ChromaDB can return `None` entries in the metadatas list when a
drawer has no metadata. The CLI print path must not crash on them
mid-render — it used to raise `AttributeError: 'NoneType' object has
no attribute 'get'` after printing earlier results."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
search("anything", "/fake/path")
captured = capsys.readouterr()
assert "[1]" in captured.out
assert "[2]" in captured.out
# Second result renders with fallback '?' values instead of crashing
assert "second doc" in captured.out