fix: guard None metadata/doc in tool_check_duplicate and Layer1/Layer2

Chroma 1.5.x can return ``None`` inside the ``metadatas`` / ``documents``
lists of a query/get result for partially-flushed rows. The codebase
already has a systemic None-guard pattern (merged #999, #1013, #1019)
but three call sites were still unguarded:

* ``mcp_server.tool_check_duplicate`` (``mcp_server.py:487-488``) —
  ``meta = results["metadatas"][0][i]`` followed by ``meta.get(...)``
  raises ``AttributeError: 'NoneType' object has no attribute 'get'``.
  The broad ``except Exception`` wrapper (line 504) swallows it and
  returns an uninformative ``"Duplicate check failed"``.

* ``layers.Layer1.generate`` (``layers.py:126``) — iterates
  ``zip(docs, metas)`` and calls ``meta.get(key)`` in the importance
  loop. A single None metadata blows up the entire wake-up render.

* ``layers.Layer2.retrieve`` (``layers.py:224``) — same pattern, same
  crash path for the on-demand render.

Apply the same ``meta = meta or {}`` / ``doc = doc or ""`` idiom used
by the merged guards in the search path. Three-line additions, no
behaviour change on well-formed results.

Tests added:

* ``test_check_duplicate_handles_none_metadata`` — mocks the collection
  query to return ``None`` for one metadata and document, asserts the
  call does not crash and the sentinel-rendered entry has wing/room "?"
  and empty content.
* ``test_layer1_handles_none_metadata`` / ``_handles_none_document``
* ``test_layer2_handles_none_metadata``

Relationship to other open PRs:

* **#1019** guarded ``searcher.py`` loops. This PR extends the same
  guard to the three call sites #1019 did not touch.
* **#979** fixed ``tool_check_duplicate`` negative similarity but left
  the None-metadata path unguarded.
* Does not overlap **#1013** (``Layer3.search_raw``) or **#999**.
This commit is contained in:
eldar702
2026-04-19 11:13:50 +03:00
parent 109d7f267c
commit 4949aab68b
4 changed files with 113 additions and 2 deletions
+4
View File
@@ -124,6 +124,8 @@ class Layer1:
# Score each drawer: prefer high importance, recent filing # Score each drawer: prefer high importance, recent filing
scored = [] scored = []
for doc, meta in zip(docs, metas): for doc, meta in zip(docs, metas):
meta = meta or {}
doc = doc or ""
importance = 3 importance = 3
# Try multiple metadata keys that might carry weight info # Try multiple metadata keys that might carry weight info
for key in ("importance", "emotional_weight", "weight"): for key in ("importance", "emotional_weight", "weight"):
@@ -222,6 +224,8 @@ class Layer2:
lines = [f"## L2 — ON-DEMAND ({len(docs)} drawers)"] lines = [f"## L2 — ON-DEMAND ({len(docs)} drawers)"]
for doc, meta in zip(docs[:n_results], metas[:n_results]): for doc, meta in zip(docs[:n_results], metas[:n_results]):
meta = meta or {}
doc = doc or ""
room_name = meta.get("room", "?") room_name = meta.get("room", "?")
source = Path(meta.get("source_file", "")).name if meta.get("source_file") else "" source = Path(meta.get("source_file", "")).name if meta.get("source_file") else ""
snippet = doc.strip().replace("\n", " ") snippet = doc.strip().replace("\n", " ")
+4 -2
View File
@@ -484,8 +484,10 @@ def tool_check_duplicate(content: str, threshold: float = 0.9):
dist = results["distances"][0][i] dist = results["distances"][0][i]
similarity = round(1 - dist, 3) similarity = round(1 - dist, 3)
if similarity >= threshold: if similarity >= threshold:
meta = results["metadatas"][0][i] # Chroma 1.5.x can return None for partially-flushed rows;
doc = results["documents"][0][i] # coerce to empty sentinels so downstream .get() is safe.
meta = results["metadatas"][0][i] or {}
doc = results["documents"][0][i] or ""
duplicates.append( duplicates.append(
{ {
"id": drawer_id, "id": drawer_id,
+69
View File
@@ -655,3 +655,72 @@ def test_memory_stack_status_with_palace(tmp_path):
assert result["total_drawers"] == 42 assert result["total_drawers"] == 42
assert result["L0_identity"]["exists"] is True assert result["L0_identity"]["exists"] is True
# ── Layer1 / Layer2 None-metadata guards ───────────────────────────────
#
# Chroma 1.5.x can return ``None`` inside the ``metadatas`` / ``documents``
# lists for partially-flushed rows. The Layer1.generate() and
# Layer2.retrieve() loops previously called ``meta.get(...)`` without
# coercing, raising ``AttributeError: 'NoneType' object has no attribute
# 'get'`` and blowing up the whole wake-up render. These tests guard that
# the loops tolerate the None entries and render the rest of the result.
def test_layer1_handles_none_metadata():
"""Layer1.generate tolerates None entries in the metadatas list."""
docs = ["important memory", "another memory"]
metas = [{"room": "decisions", "source_file": "a.txt"}, None]
mock_col = _mock_chromadb_for_layer(docs, metas)
with (
patch("mempalace.layers.MempalaceConfig") as mock_cfg,
patch("mempalace.layers._get_collection", return_value=mock_col),
):
mock_cfg.return_value.palace_path = "/fake"
layer = Layer1(palace_path="/fake")
# Should not raise AttributeError on the None entry.
result = layer.generate()
assert "ESSENTIAL STORY" in result
assert "important memory" in result
def test_layer1_handles_none_document():
"""Layer1.generate tolerates None entries in the documents list."""
docs = ["first doc", None]
metas = [
{"room": "r", "source_file": "a.txt"},
{"room": "r", "source_file": "b.txt"},
]
mock_col = _mock_chromadb_for_layer(docs, metas)
with (
patch("mempalace.layers.MempalaceConfig") as mock_cfg,
patch("mempalace.layers._get_collection", return_value=mock_col),
):
mock_cfg.return_value.palace_path = "/fake"
layer = Layer1(palace_path="/fake")
result = layer.generate()
assert result # Render succeeded despite the None document.
def test_layer2_handles_none_metadata():
"""Layer2.retrieve tolerates None entries in the metadatas list."""
mock_col = MagicMock()
mock_col.get.return_value = {
"documents": ["first doc", "second doc"],
"metadatas": [{"room": "r", "source_file": "a.txt"}, None],
}
with (
patch("mempalace.layers.MempalaceConfig") as mock_cfg,
patch("mempalace.layers._get_collection", return_value=mock_col),
):
mock_cfg.return_value.palace_path = "/fake"
layer = Layer2(palace_path="/fake")
# Should not raise AttributeError on the None entry.
result = layer.retrieve()
assert "L2 — ON-DEMAND" in result
+36
View File
@@ -9,6 +9,7 @@ via monkeypatch to avoid touching real data.
from datetime import datetime from datetime import datetime
import json import json
import sys import sys
from unittest.mock import MagicMock
import pytest import pytest
@@ -495,6 +496,41 @@ class TestWriteTools:
result = tool_delete_drawer("nonexistent_drawer") result = tool_delete_drawer("nonexistent_drawer")
assert result["success"] is False assert result["success"] is False
def test_check_duplicate_handles_none_metadata(self, monkeypatch, config, kg):
"""tool_check_duplicate must tolerate None entries in the result lists
that ChromaDB 1.5.x returns for partially-flushed rows.
Previously ``meta = results["metadatas"][0][i]`` was unguarded and
raised ``AttributeError: 'NoneType' object has no attribute 'get'``
the moment the first matching drawer came back with None metadata —
surfacing to the MCP client as the uninformative
``"Duplicate check failed"`` because the broad ``except Exception``
wrapper swallows the real cause.
"""
_patch_mcp_server(monkeypatch, config, kg)
from mempalace import mcp_server
mock_col = MagicMock()
mock_col.query.return_value = {
"ids": [["d1", "d2"]],
"distances": [[0.05, 0.05]],
"metadatas": [[{"wing": "w", "room": "r"}, None]],
"documents": [["first doc", None]],
}
monkeypatch.setattr(mcp_server, "_get_collection", lambda: mock_col)
result = mcp_server.tool_check_duplicate("any content", threshold=0.5)
# Both entries land in matches (above threshold), None ones rendered
# with sentinel values rather than crashing the whole response.
assert result.get("is_duplicate") is True
assert len(result["matches"]) == 2
# The None-metadata entry falls back to sentinels.
none_entry = result["matches"][1]
assert none_entry["wing"] == "?"
assert none_entry["room"] == "?"
assert none_entry["content"] == ""
def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_check_duplicate from mempalace.mcp_server import tool_check_duplicate