Merge pull request #1030 from eldar702/fix/none-metadata-residual-guards

fix: guard None metadata/doc in tool_check_duplicate and Layer1/Layer2
This commit is contained in:
Igor Lins e Silva
2026-05-06 01:51:24 -03:00
committed by GitHub
4 changed files with 113 additions and 2 deletions
+4
View File
@@ -124,6 +124,8 @@ class Layer1:
# Score each drawer: prefer high importance, recent filing # Score each drawer: prefer high importance, recent filing
scored = [] scored = []
for doc, meta in zip(docs, metas): for doc, meta in zip(docs, metas):
meta = meta or {}
doc = doc or ""
importance = 3 importance = 3
# Try multiple metadata keys that might carry weight info # Try multiple metadata keys that might carry weight info
for key in ("importance", "emotional_weight", "weight"): for key in ("importance", "emotional_weight", "weight"):
@@ -222,6 +224,8 @@ class Layer2:
lines = [f"## L2 — ON-DEMAND ({len(docs)} drawers)"] lines = [f"## L2 — ON-DEMAND ({len(docs)} drawers)"]
for doc, meta in zip(docs[:n_results], metas[:n_results]): for doc, meta in zip(docs[:n_results], metas[:n_results]):
meta = meta or {}
doc = doc or ""
room_name = meta.get("room", "?") room_name = meta.get("room", "?")
source = Path(meta.get("source_file", "")).name if meta.get("source_file") else "" source = Path(meta.get("source_file", "")).name if meta.get("source_file") else ""
snippet = doc.strip().replace("\n", " ") snippet = doc.strip().replace("\n", " ")
+4 -2
View File
@@ -742,8 +742,10 @@ def tool_check_duplicate(content: str, threshold: float = 0.9):
dist = results["distances"][0][i] dist = results["distances"][0][i]
similarity = round(1 - dist, 3) similarity = round(1 - dist, 3)
if similarity >= threshold: if similarity >= threshold:
meta = results["metadatas"][0][i] # Chroma 1.5.x can return None for partially-flushed rows;
doc = results["documents"][0][i] # coerce to empty sentinels so downstream .get() is safe.
meta = results["metadatas"][0][i] or {}
doc = results["documents"][0][i] or ""
duplicates.append( duplicates.append(
{ {
"id": drawer_id, "id": drawer_id,
+69
View File
@@ -655,3 +655,72 @@ def test_memory_stack_status_with_palace(tmp_path):
assert result["total_drawers"] == 42 assert result["total_drawers"] == 42
assert result["L0_identity"]["exists"] is True assert result["L0_identity"]["exists"] is True
# ── Layer1 / Layer2 None-metadata guards ───────────────────────────────
#
# Chroma 1.5.x can return ``None`` inside the ``metadatas`` / ``documents``
# lists for partially-flushed rows. The Layer1.generate() and
# Layer2.retrieve() loops previously called ``meta.get(...)`` without
# coercing, raising ``AttributeError: 'NoneType' object has no attribute
# 'get'`` and blowing up the whole wake-up render. These tests guard that
# the loops tolerate the None entries and render the rest of the result.
def test_layer1_handles_none_metadata():
"""Layer1.generate tolerates None entries in the metadatas list."""
docs = ["important memory", "another memory"]
metas = [{"room": "decisions", "source_file": "a.txt"}, None]
mock_col = _mock_chromadb_for_layer(docs, metas)
with (
patch("mempalace.layers.MempalaceConfig") as mock_cfg,
patch("mempalace.layers._get_collection", return_value=mock_col),
):
mock_cfg.return_value.palace_path = "/fake"
layer = Layer1(palace_path="/fake")
# Should not raise AttributeError on the None entry.
result = layer.generate()
assert "ESSENTIAL STORY" in result
assert "important memory" in result
def test_layer1_handles_none_document():
"""Layer1.generate tolerates None entries in the documents list."""
docs = ["first doc", None]
metas = [
{"room": "r", "source_file": "a.txt"},
{"room": "r", "source_file": "b.txt"},
]
mock_col = _mock_chromadb_for_layer(docs, metas)
with (
patch("mempalace.layers.MempalaceConfig") as mock_cfg,
patch("mempalace.layers._get_collection", return_value=mock_col),
):
mock_cfg.return_value.palace_path = "/fake"
layer = Layer1(palace_path="/fake")
result = layer.generate()
assert result # Render succeeded despite the None document.
def test_layer2_handles_none_metadata():
"""Layer2.retrieve tolerates None entries in the metadatas list."""
mock_col = MagicMock()
mock_col.get.return_value = {
"documents": ["first doc", "second doc"],
"metadatas": [{"room": "r", "source_file": "a.txt"}, None],
}
with (
patch("mempalace.layers.MempalaceConfig") as mock_cfg,
patch("mempalace.layers._get_collection", return_value=mock_col),
):
mock_cfg.return_value.palace_path = "/fake"
layer = Layer2(palace_path="/fake")
# Should not raise AttributeError on the None entry.
result = layer.retrieve()
assert "L2 — ON-DEMAND" in result
+36
View File
@@ -10,6 +10,7 @@ from datetime import datetime
import json import json
import os import os
import sys import sys
from unittest.mock import MagicMock
import pytest import pytest
@@ -496,6 +497,41 @@ class TestWriteTools:
result = tool_delete_drawer("nonexistent_drawer") result = tool_delete_drawer("nonexistent_drawer")
assert result["success"] is False assert result["success"] is False
def test_check_duplicate_handles_none_metadata(self, monkeypatch, config, kg):
"""tool_check_duplicate must tolerate None entries in the result lists
that ChromaDB 1.5.x returns for partially-flushed rows.
Previously ``meta = results["metadatas"][0][i]`` was unguarded and
raised ``AttributeError: 'NoneType' object has no attribute 'get'``
the moment the first matching drawer came back with None metadata —
surfacing to the MCP client as the uninformative
``"Duplicate check failed"`` because the broad ``except Exception``
wrapper swallows the real cause.
"""
_patch_mcp_server(monkeypatch, config, kg)
from mempalace import mcp_server
mock_col = MagicMock()
mock_col.query.return_value = {
"ids": [["d1", "d2"]],
"distances": [[0.05, 0.05]],
"metadatas": [[{"wing": "w", "room": "r"}, None]],
"documents": [["first doc", None]],
}
monkeypatch.setattr(mcp_server, "_get_collection", lambda: mock_col)
result = mcp_server.tool_check_duplicate("any content", threshold=0.5)
# Both entries land in matches (above threshold), None ones rendered
# with sentinel values rather than crashing the whole response.
assert result.get("is_duplicate") is True
assert len(result["matches"]) == 2
# The None-metadata entry falls back to sentinels.
none_entry = result["matches"][1]
assert none_entry["wing"] == "?"
assert none_entry["room"] == "?"
assert none_entry["content"] == ""
def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_check_duplicate from mempalace.mcp_server import tool_check_duplicate