Merge pull request #999 from jphein/fix/searcher-none-metadata

fix(searcher): guard against None metadata in CLI print path
This commit is contained in:
Ben Sigman
2026-04-18 13:41:52 -07:00
committed by GitHub
6 changed files with 120 additions and 0 deletions
+4
View File
@@ -315,6 +315,7 @@ def tool_status():
try:
all_meta = _get_cached_metadata(col)
for m in all_meta:
m = m or {}
w = m.get("wing", "unknown")
r = m.get("room", "unknown")
wings[w] = wings.get(w, 0) + 1
@@ -368,6 +369,7 @@ def tool_list_wings():
try:
all_meta = _get_cached_metadata(col)
for m in all_meta:
m = m or {}
w = m.get("wing", "unknown")
wings[w] = wings.get(w, 0) + 1
except Exception as e:
@@ -391,6 +393,7 @@ def tool_list_rooms(wing: str = None):
where = {"wing": wing} if wing else None
all_meta = _fetch_all_metadata(col, where=where)
for m in all_meta:
m = m or {}
r = m.get("room", "unknown")
rooms[r] = rooms.get(r, 0) + 1
except Exception as e:
@@ -409,6 +412,7 @@ def tool_get_taxonomy():
try:
all_meta = _get_cached_metadata(col)
for m in all_meta:
m = m or {}
w = m.get("wing", "unknown")
r = m.get("room", "unknown")
if w not in taxonomy:
+1
View File
@@ -854,6 +854,7 @@ def status(palace_path: str):
wing_rooms = defaultdict(lambda: defaultdict(int))
for m in metas:
m = m or {}
wing_rooms[m.get("wing", "?")][m.get("room", "?")] += 1
print(f"\n{'=' * 55}")
+3
View File
@@ -283,6 +283,7 @@ def search(query: str, palace_path: str, wing: str = None, room: str = None, n_r
for i, (doc, meta, dist) in enumerate(zip(docs, metas, dists), 1):
similarity = round(max(0.0, 1 - dist), 3)
meta = meta or {}
source = Path(meta.get("source_file", "?")).name
wing_name = meta.get("wing", "?")
room_name = meta.get("room", "?")
@@ -372,6 +373,7 @@ def search_memories(
_first_or_empty(closet_results, "distances"),
)
):
cmeta = cmeta or {}
source = cmeta.get("source_file", "")
if source and source not in closet_boost_by_source:
closet_boost_by_source[source] = (rank, cdist, cdoc[:200])
@@ -394,6 +396,7 @@ def search_memories(
if max_distance > 0.0 and dist > max_distance:
continue
meta = meta or {}
source = meta.get("source_file", "") or ""
boost = 0.0
matched_via = "drawer"
+32
View File
@@ -250,6 +250,38 @@ class TestReadTools:
assert "project" in result["wings"]
assert "notes" in result["wings"]
def test_status_handles_none_metadata_without_partial(
self, monkeypatch, config, palace_path, kg
):
"""tool_status must not crash or go partial when the metadata cache
returns a ``None`` entry — palaces can contain drawers with no
metadata (older mining paths, third-party writes). Before the guard,
``m.get("wing")`` raised AttributeError mid-tally and the result
carried ``"error"`` + ``"partial": True`` even though the data was
perfectly fetchable."""
from unittest.mock import patch as _patch
_patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_status
# Inject a metadata cache where one entry is None
with _patch("mempalace.mcp_server._get_collection") as mock_get_col:
fake_col = type("C", (), {"count": lambda self: 2})()
mock_get_col.return_value = fake_col
with _patch(
"mempalace.mcp_server._get_cached_metadata",
return_value=[{"wing": "proj", "room": "r"}, None],
):
result = tool_status()
# The None-metadata drawer falls under 'unknown/unknown' — no crash,
# no partial flag.
assert "error" not in result
assert result.get("partial") is not True
assert result["total_drawers"] == 2
assert result["wings"].get("proj") == 1
assert result["wings"].get("unknown") == 1
def test_list_wings(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_list_wings
+30
View File
@@ -343,6 +343,36 @@ def test_status_missing_palace_does_not_create_empty_collection(tmp_path, capsys
assert not palace_path.exists()
def test_status_handles_none_metadata_without_crash(tmp_path, capsys):
"""status must not crash when col.get returns a None entry in metadatas.
Palaces can contain drawers whose metadata was never set (older mining
paths, drawers written by third-party tools). Before the guard, status
crashed mid-tally with ``AttributeError: 'NoneType' object has no
attribute 'get'`` at the wing/room histogram line."""
from unittest.mock import patch
class FakeCol:
def count(self):
return 2
def get(self, *args, **kwargs):
return {
"ids": ["a", "b"],
"documents": ["doc a", "doc b"],
"metadatas": [{"wing": "proj", "room": "r"}, None],
}
with patch("mempalace.miner.get_collection", return_value=FakeCol()):
status(str(tmp_path))
out = capsys.readouterr().out
# No crash; the None-metadata row is counted under the ?/? fallback
# alongside the real wing=proj row.
assert "WING: ?" in out
assert "WING: proj" in out
# ── normalize_version schema gate ───────────────────────────────────────
#
# When the normalization pipeline changes shape (e.g., strip_noise lands),
+50
View File
@@ -89,6 +89,37 @@ class TestSearchMemories:
assert result["filters"]["wing"] == "project"
assert result["filters"]["room"] == "backend"
def test_search_memories_handles_none_metadata(self):
"""API path: `None` entries in the drawer results' metadatas list must
fall back to the sentinel strings (wing/room 'unknown', source '?')
rather than raising `AttributeError: 'NoneType' object has no
attribute 'get'` while the rest of the result set renders."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
"ids": [["d1", "d2"]],
}
def mock_get_collection(path, create=False):
# First call: drawers. Second call: closets — raise so hybrid
# degrades to pure drawer search (the catch block covers it).
if not hasattr(mock_get_collection, "_called"):
mock_get_collection._called = True
return mock_col
raise RuntimeError("no closets")
with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
result = search_memories("anything", "/fake/path")
assert "results" in result
assert len(result["results"]) == 2
# The None-metadata hit renders with sentinel values, not a crash.
none_hit = result["results"][1]
assert none_hit["text"] == "second doc"
assert none_hit["wing"] == "unknown"
assert none_hit["room"] == "unknown"
# ── search() (CLI print function) ─────────────────────────────────────
@@ -141,3 +172,22 @@ class TestSearchCLI:
captured = capsys.readouterr()
# Should have output with at least one result block
assert "[1]" in captured.out
def test_search_handles_none_metadata_without_crash(self, palace_path, capsys):
"""ChromaDB can return `None` entries in the metadatas list when a
drawer has no metadata. The CLI print path must not crash on them
mid-render — it used to raise `AttributeError: 'NoneType' object has
no attribute 'get'` after printing earlier results."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
search("anything", "/fake/path")
captured = capsys.readouterr()
assert "[1]" in captured.out
assert "[2]" in captured.out
# Second result renders with fallback '?' values instead of crashing
assert "second doc" in captured.out