fix(searcher): guard API path + closet loop against None metadata too
Per Copilot review on the CLI-only PR (#999): search_memories() has the same vulnerability in two additional spots, since ChromaDB can return None entries in the inner metadatas list for either the drawer query or the closets query. Without guards, the API path crashes with: AttributeError: 'NoneType' object has no attribute 'get' at either \`cmeta.get("source_file", "")\` in the closet boost lookup or \`meta.get("source_file", "") or ""\` in the drawer scoring loop. Applies the matching \`meta = meta or {}\` / \`cmeta = cmeta or {}\` guard at both sites and adds an API-path regression test that mocks a drawer query result with a None metadata entry and asserts both hits render — the None-metadata hit with the existing \`"unknown"\` sentinel values the scoring loop already writes for missing keys. Verified both the new API test and the existing CLI test fail without the guards (AttributeError) and pass with them.
This commit is contained in:
@@ -373,6 +373,7 @@ def search_memories(
|
|||||||
_first_or_empty(closet_results, "distances"),
|
_first_or_empty(closet_results, "distances"),
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
|
cmeta = cmeta or {}
|
||||||
source = cmeta.get("source_file", "")
|
source = cmeta.get("source_file", "")
|
||||||
if source and source not in closet_boost_by_source:
|
if source and source not in closet_boost_by_source:
|
||||||
closet_boost_by_source[source] = (rank, cdist, cdoc[:200])
|
closet_boost_by_source[source] = (rank, cdist, cdoc[:200])
|
||||||
@@ -395,6 +396,7 @@ def search_memories(
|
|||||||
if max_distance > 0.0 and dist > max_distance:
|
if max_distance > 0.0 and dist > max_distance:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
meta = meta or {}
|
||||||
source = meta.get("source_file", "") or ""
|
source = meta.get("source_file", "") or ""
|
||||||
boost = 0.0
|
boost = 0.0
|
||||||
matched_via = "drawer"
|
matched_via = "drawer"
|
||||||
|
|||||||
@@ -89,6 +89,37 @@ class TestSearchMemories:
|
|||||||
assert result["filters"]["wing"] == "project"
|
assert result["filters"]["wing"] == "project"
|
||||||
assert result["filters"]["room"] == "backend"
|
assert result["filters"]["room"] == "backend"
|
||||||
|
|
||||||
|
def test_search_memories_handles_none_metadata(self):
|
||||||
|
"""API path: `None` entries in the drawer results' metadatas list must
|
||||||
|
fall back to the sentinel strings (wing/room 'unknown', source '?')
|
||||||
|
rather than raising `AttributeError: 'NoneType' object has no
|
||||||
|
attribute 'get'` while the rest of the result set renders."""
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.query.return_value = {
|
||||||
|
"documents": [["first doc", "second doc"]],
|
||||||
|
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
|
||||||
|
"distances": [[0.1, 0.2]],
|
||||||
|
"ids": [["d1", "d2"]],
|
||||||
|
}
|
||||||
|
|
||||||
|
def mock_get_collection(path, create=False):
|
||||||
|
# First call: drawers. Second call: closets — raise so hybrid
|
||||||
|
# degrades to pure drawer search (the catch block covers it).
|
||||||
|
if not hasattr(mock_get_collection, "_called"):
|
||||||
|
mock_get_collection._called = True
|
||||||
|
return mock_col
|
||||||
|
raise RuntimeError("no closets")
|
||||||
|
|
||||||
|
with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
|
||||||
|
result = search_memories("anything", "/fake/path")
|
||||||
|
assert "results" in result
|
||||||
|
assert len(result["results"]) == 2
|
||||||
|
# The None-metadata hit renders with sentinel values, not a crash.
|
||||||
|
none_hit = result["results"][1]
|
||||||
|
assert none_hit["text"] == "second doc"
|
||||||
|
assert none_hit["wing"] == "unknown"
|
||||||
|
assert none_hit["room"] == "unknown"
|
||||||
|
|
||||||
|
|
||||||
# ── search() (CLI print function) ─────────────────────────────────────
|
# ── search() (CLI print function) ─────────────────────────────────────
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user