From 4949aab68b71f540514b3a7aad45828fa6366f5a Mon Sep 17 00:00:00 2001 From: eldar702 Date: Sun, 19 Apr 2026 11:13:50 +0300 Subject: [PATCH] fix: guard None metadata/doc in tool_check_duplicate and Layer1/Layer2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chroma 1.5.x can return ``None`` inside the ``metadatas`` / ``documents`` lists of a query/get result for partially-flushed rows. The codebase already has a systemic None-guard pattern (merged #999, #1013, #1019) but three call sites were still unguarded: * ``mcp_server.tool_check_duplicate`` (``mcp_server.py:487-488``) — ``meta = results["metadatas"][0][i]`` followed by ``meta.get(...)`` raises ``AttributeError: 'NoneType' object has no attribute 'get'``. The broad ``except Exception`` wrapper (line 504) swallows it and returns an uninformative ``"Duplicate check failed"``. * ``layers.Layer1.generate`` (``layers.py:126``) — iterates ``zip(docs, metas)`` and calls ``meta.get(key)`` in the importance loop. A single None metadata blows up the entire wake-up render. * ``layers.Layer2.retrieve`` (``layers.py:224``) — same pattern, same crash path for the on-demand render. Apply the same ``meta = meta or {}`` / ``doc = doc or ""`` idiom used by the merged guards in the search path. Three-line additions, no behaviour change on well-formed results. Tests added: * ``test_check_duplicate_handles_none_metadata`` — mocks the collection query to return ``None`` for one metadata and document, asserts the call does not crash and the sentinel-rendered entry has wing/room "?" and empty content. * ``test_layer1_handles_none_metadata`` / ``_handles_none_document`` * ``test_layer2_handles_none_metadata`` Relationship to other open PRs: * **#1019** guarded ``searcher.py`` loops. This PR extends the same guard to the three call sites #1019 did not touch. * **#979** fixed ``tool_check_duplicate`` negative similarity but left the None-metadata path unguarded. * Does not overlap **#1013** (``Layer3.search_raw``) or **#999**. --- mempalace/layers.py | 4 +++ mempalace/mcp_server.py | 6 ++-- tests/test_layers.py | 69 ++++++++++++++++++++++++++++++++++++++++ tests/test_mcp_server.py | 36 +++++++++++++++++++++ 4 files changed, 113 insertions(+), 2 deletions(-) diff --git a/mempalace/layers.py b/mempalace/layers.py index a0f9b6d..b20c656 100644 --- a/mempalace/layers.py +++ b/mempalace/layers.py @@ -124,6 +124,8 @@ class Layer1: # Score each drawer: prefer high importance, recent filing scored = [] for doc, meta in zip(docs, metas): + meta = meta or {} + doc = doc or "" importance = 3 # Try multiple metadata keys that might carry weight info for key in ("importance", "emotional_weight", "weight"): @@ -222,6 +224,8 @@ class Layer2: lines = [f"## L2 — ON-DEMAND ({len(docs)} drawers)"] for doc, meta in zip(docs[:n_results], metas[:n_results]): + meta = meta or {} + doc = doc or "" room_name = meta.get("room", "?") source = Path(meta.get("source_file", "")).name if meta.get("source_file") else "" snippet = doc.strip().replace("\n", " ") diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 06355c4..ae1eb71 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -484,8 +484,10 @@ def tool_check_duplicate(content: str, threshold: float = 0.9): dist = results["distances"][0][i] similarity = round(1 - dist, 3) if similarity >= threshold: - meta = results["metadatas"][0][i] - doc = results["documents"][0][i] + # Chroma 1.5.x can return None for partially-flushed rows; + # coerce to empty sentinels so downstream .get() is safe. + meta = results["metadatas"][0][i] or {} + doc = results["documents"][0][i] or "" duplicates.append( { "id": drawer_id, diff --git a/tests/test_layers.py b/tests/test_layers.py index 575183f..d4c54ce 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -655,3 +655,72 @@ def test_memory_stack_status_with_palace(tmp_path): assert result["total_drawers"] == 42 assert result["L0_identity"]["exists"] is True + + +# ── Layer1 / Layer2 None-metadata guards ─────────────────────────────── +# +# Chroma 1.5.x can return ``None`` inside the ``metadatas`` / ``documents`` +# lists for partially-flushed rows. The Layer1.generate() and +# Layer2.retrieve() loops previously called ``meta.get(...)`` without +# coercing, raising ``AttributeError: 'NoneType' object has no attribute +# 'get'`` and blowing up the whole wake-up render. These tests guard that +# the loops tolerate the None entries and render the rest of the result. + + +def test_layer1_handles_none_metadata(): + """Layer1.generate tolerates None entries in the metadatas list.""" + docs = ["important memory", "another memory"] + metas = [{"room": "decisions", "source_file": "a.txt"}, None] + mock_col = _mock_chromadb_for_layer(docs, metas) + + with ( + patch("mempalace.layers.MempalaceConfig") as mock_cfg, + patch("mempalace.layers._get_collection", return_value=mock_col), + ): + mock_cfg.return_value.palace_path = "/fake" + layer = Layer1(palace_path="/fake") + # Should not raise AttributeError on the None entry. + result = layer.generate() + + assert "ESSENTIAL STORY" in result + assert "important memory" in result + + +def test_layer1_handles_none_document(): + """Layer1.generate tolerates None entries in the documents list.""" + docs = ["first doc", None] + metas = [ + {"room": "r", "source_file": "a.txt"}, + {"room": "r", "source_file": "b.txt"}, + ] + mock_col = _mock_chromadb_for_layer(docs, metas) + + with ( + patch("mempalace.layers.MempalaceConfig") as mock_cfg, + patch("mempalace.layers._get_collection", return_value=mock_col), + ): + mock_cfg.return_value.palace_path = "/fake" + layer = Layer1(palace_path="/fake") + result = layer.generate() + + assert result # Render succeeded despite the None document. + + +def test_layer2_handles_none_metadata(): + """Layer2.retrieve tolerates None entries in the metadatas list.""" + mock_col = MagicMock() + mock_col.get.return_value = { + "documents": ["first doc", "second doc"], + "metadatas": [{"room": "r", "source_file": "a.txt"}, None], + } + + with ( + patch("mempalace.layers.MempalaceConfig") as mock_cfg, + patch("mempalace.layers._get_collection", return_value=mock_col), + ): + mock_cfg.return_value.palace_path = "/fake" + layer = Layer2(palace_path="/fake") + # Should not raise AttributeError on the None entry. + result = layer.retrieve() + + assert "L2 — ON-DEMAND" in result diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 899e6a7..e376f43 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -9,6 +9,7 @@ via monkeypatch to avoid touching real data. from datetime import datetime import json import sys +from unittest.mock import MagicMock import pytest @@ -495,6 +496,41 @@ class TestWriteTools: result = tool_delete_drawer("nonexistent_drawer") assert result["success"] is False + def test_check_duplicate_handles_none_metadata(self, monkeypatch, config, kg): + """tool_check_duplicate must tolerate None entries in the result lists + that ChromaDB 1.5.x returns for partially-flushed rows. + + Previously ``meta = results["metadatas"][0][i]`` was unguarded and + raised ``AttributeError: 'NoneType' object has no attribute 'get'`` + the moment the first matching drawer came back with None metadata — + surfacing to the MCP client as the uninformative + ``"Duplicate check failed"`` because the broad ``except Exception`` + wrapper swallows the real cause. + """ + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + mock_col = MagicMock() + mock_col.query.return_value = { + "ids": [["d1", "d2"]], + "distances": [[0.05, 0.05]], + "metadatas": [[{"wing": "w", "room": "r"}, None]], + "documents": [["first doc", None]], + } + monkeypatch.setattr(mcp_server, "_get_collection", lambda: mock_col) + + result = mcp_server.tool_check_duplicate("any content", threshold=0.5) + + # Both entries land in matches (above threshold), None ones rendered + # with sentinel values rather than crashing the whole response. + assert result.get("is_duplicate") is True + assert len(result["matches"]) == 2 + # The None-metadata entry falls back to sentinels. + none_entry = result["matches"][1] + assert none_entry["wing"] == "?" + assert none_entry["room"] == "?" + assert none_entry["content"] == "" + def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_check_duplicate