fix(searcher): guard API path + closet loop against None metadata too

Per Copilot review on the CLI-only PR (#999): search_memories() has the same vulnerability in two additional spots, since ChromaDB can return None entries in the inner metadatas list for either the drawer query or the closets query. Without guards, the API path crashes with: AttributeError: 'NoneType' object has no attribute 'get' at either \`cmeta.get("source_file", "")\` in the closet boost lookup or \`meta.get("source_file", "") or ""\` in the drawer scoring loop. Applies the matching \`meta = meta or {}\` / \`cmeta = cmeta or {}\` guard at both sites and adds an API-path regression test that mocks a drawer query result with a None metadata entry and asserts both hits render — the None-metadata hit with the existing \`"unknown"\` sentinel values the scoring loop already writes for missing keys. Verified both the new API test and the existing CLI test fail without the guards (AttributeError) and pass with them.
2026-04-18 10:37:05 -07:00
parent feba7e8043
commit 7690574dde
2 changed files with 33 additions and 0 deletions
@@ -373,6 +373,7 @@ def search_memories(
                _first_or_empty(closet_results, "distances"),
            )
        ):
            cmeta = cmeta or {}
            source = cmeta.get("source_file", "")
            if source and source not in closet_boost_by_source:
                closet_boost_by_source[source] = (rank, cdist, cdoc[:200])
@@ -395,6 +396,7 @@ def search_memories(
        if max_distance > 0.0 and dist > max_distance:
            continue
        meta = meta or {}
        source = meta.get("source_file", "") or ""
        boost = 0.0
        matched_via = "drawer"
@@ -89,6 +89,37 @@ class TestSearchMemories:
        assert result["filters"]["wing"] == "project"
        assert result["filters"]["room"] == "backend"
    def test_search_memories_handles_none_metadata(self):
        """API path: `None` entries in the drawer results' metadatas list must
        fall back to the sentinel strings (wing/room 'unknown', source '?')
        rather than raising `AttributeError: 'NoneType' object has no
        attribute 'get'` while the rest of the result set renders."""
        mock_col = MagicMock()
        mock_col.query.return_value = {
            "documents": [["first doc", "second doc"]],
            "metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
            "distances": [[0.1, 0.2]],
            "ids": [["d1", "d2"]],
        }
        def mock_get_collection(path, create=False):
            # First call: drawers. Second call: closets — raise so hybrid
            # degrades to pure drawer search (the catch block covers it).
            if not hasattr(mock_get_collection, "_called"):
                mock_get_collection._called = True
                return mock_col
            raise RuntimeError("no closets")
        with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
            result = search_memories("anything", "/fake/path")
        assert "results" in result
        assert len(result["results"]) == 2
        # The None-metadata hit renders with sentinel values, not a crash.
        none_hit = result["results"][1]
        assert none_hit["text"] == "second doc"
        assert none_hit["wing"] == "unknown"
        assert none_hit["room"] == "unknown"
 # ── search() (CLI print function) ─────────────────────────────────────