fix(searcher): guard API path + closet loop against None metadata too

Per Copilot review on the CLI-only PR (#999): search_memories() has the
same vulnerability in two additional spots, since ChromaDB can return
None entries in the inner metadatas list for either the drawer query or
the closets query. Without guards, the API path crashes with:

    AttributeError: 'NoneType' object has no attribute 'get'

at either \`cmeta.get("source_file", "")\` in the closet boost lookup or
\`meta.get("source_file", "") or ""\` in the drawer scoring loop.

Applies the matching \`meta = meta or {}\` / \`cmeta = cmeta or {}\`
guard at both sites and adds an API-path regression test that mocks a
drawer query result with a None metadata entry and asserts both hits
render — the None-metadata hit with the existing \`"unknown"\` sentinel
values the scoring loop already writes for missing keys.

Verified both the new API test and the existing CLI test fail without
the guards (AttributeError) and pass with them.
This commit is contained in:
jp
2026-04-18 10:37:05 -07:00
parent feba7e8043
commit 7690574dde
2 changed files with 33 additions and 0 deletions
+2
View File
@@ -373,6 +373,7 @@ def search_memories(
_first_or_empty(closet_results, "distances"),
)
):
cmeta = cmeta or {}
source = cmeta.get("source_file", "")
if source and source not in closet_boost_by_source:
closet_boost_by_source[source] = (rank, cdist, cdoc[:200])
@@ -395,6 +396,7 @@ def search_memories(
if max_distance > 0.0 and dist > max_distance:
continue
meta = meta or {}
source = meta.get("source_file", "") or ""
boost = 0.0
matched_via = "drawer"
+31
View File
@@ -89,6 +89,37 @@ class TestSearchMemories:
assert result["filters"]["wing"] == "project"
assert result["filters"]["room"] == "backend"
def test_search_memories_handles_none_metadata(self):
"""API path: `None` entries in the drawer results' metadatas list must
fall back to the sentinel strings (wing/room 'unknown', source '?')
rather than raising `AttributeError: 'NoneType' object has no
attribute 'get'` while the rest of the result set renders."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
"ids": [["d1", "d2"]],
}
def mock_get_collection(path, create=False):
# First call: drawers. Second call: closets — raise so hybrid
# degrades to pure drawer search (the catch block covers it).
if not hasattr(mock_get_collection, "_called"):
mock_get_collection._called = True
return mock_col
raise RuntimeError("no closets")
with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
result = search_memories("anything", "/fake/path")
assert "results" in result
assert len(result["results"]) == 2
# The None-metadata hit renders with sentinel values, not a crash.
none_hit = result["results"][1]
assert none_hit["text"] == "second doc"
assert none_hit["wing"] == "unknown"
assert none_hit["room"] == "unknown"
# ── search() (CLI print function) ─────────────────────────────────────