From 7690574dde9b804040939a9958fd7ce3a57383de Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 18 Apr 2026 10:37:05 -0700 Subject: [PATCH] fix(searcher): guard API path + closet loop against None metadata too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Copilot review on the CLI-only PR (#999): search_memories() has the same vulnerability in two additional spots, since ChromaDB can return None entries in the inner metadatas list for either the drawer query or the closets query. Without guards, the API path crashes with: AttributeError: 'NoneType' object has no attribute 'get' at either \`cmeta.get("source_file", "")\` in the closet boost lookup or \`meta.get("source_file", "") or ""\` in the drawer scoring loop. Applies the matching \`meta = meta or {}\` / \`cmeta = cmeta or {}\` guard at both sites and adds an API-path regression test that mocks a drawer query result with a None metadata entry and asserts both hits render — the None-metadata hit with the existing \`"unknown"\` sentinel values the scoring loop already writes for missing keys. Verified both the new API test and the existing CLI test fail without the guards (AttributeError) and pass with them. --- mempalace/searcher.py | 2 ++ tests/test_searcher.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/mempalace/searcher.py b/mempalace/searcher.py index ef951e2..c9b5ac3 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -373,6 +373,7 @@ def search_memories( _first_or_empty(closet_results, "distances"), ) ): + cmeta = cmeta or {} source = cmeta.get("source_file", "") if source and source not in closet_boost_by_source: closet_boost_by_source[source] = (rank, cdist, cdoc[:200]) @@ -395,6 +396,7 @@ def search_memories( if max_distance > 0.0 and dist > max_distance: continue + meta = meta or {} source = meta.get("source_file", "") or "" boost = 0.0 matched_via = "drawer" diff --git a/tests/test_searcher.py b/tests/test_searcher.py index 127e95f..3ccfb2d 100644 --- a/tests/test_searcher.py +++ b/tests/test_searcher.py @@ -89,6 +89,37 @@ class TestSearchMemories: assert result["filters"]["wing"] == "project" assert result["filters"]["room"] == "backend" + def test_search_memories_handles_none_metadata(self): + """API path: `None` entries in the drawer results' metadatas list must + fall back to the sentinel strings (wing/room 'unknown', source '?') + rather than raising `AttributeError: 'NoneType' object has no + attribute 'get'` while the rest of the result set renders.""" + mock_col = MagicMock() + mock_col.query.return_value = { + "documents": [["first doc", "second doc"]], + "metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]], + "distances": [[0.1, 0.2]], + "ids": [["d1", "d2"]], + } + + def mock_get_collection(path, create=False): + # First call: drawers. Second call: closets — raise so hybrid + # degrades to pure drawer search (the catch block covers it). + if not hasattr(mock_get_collection, "_called"): + mock_get_collection._called = True + return mock_col + raise RuntimeError("no closets") + + with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection): + result = search_memories("anything", "/fake/path") + assert "results" in result + assert len(result["results"]) == 2 + # The None-metadata hit renders with sentinel values, not a crash. + none_hit = result["results"][1] + assert none_hit["text"] == "second doc" + assert none_hit["wing"] == "unknown" + assert none_hit["room"] == "unknown" + # ── search() (CLI print function) ─────────────────────────────────────