fix(mcp): drain KG cache on tool_reconnect

tool_reconnect cleared ChromaDB caches but left _kg_by_path entries
intact. After an external replacement of knowledge_graph.sqlite3 the
server kept serving the old open sqlite3.Connection, returning stale
results.

Now iterate _kg_by_path under _kg_cache_lock, call close() best-effort,
and clear the dict so the next tool call reopens the KG from disk.
Two new tests in TestKGLazyCache verify cache invalidation and that a
failing close() does not block the clear.
This commit is contained in:
mvalentsev
2026-05-02 18:00:36 +05:00
parent 19f8a4ff68
commit 0a62658051
2 changed files with 54 additions and 2 deletions
+12 -2
View File
@@ -1418,10 +1418,11 @@ def tool_memories_filed_away():
def tool_reconnect(): def tool_reconnect():
"""Force the MCP server to drop the cached ChromaDB collection and reconnect. """Force the MCP server to drop cached ChromaDB + KnowledgeGraph state.
Use after external scripts or CLI commands modify the palace database Use after external scripts or CLI commands modify the palace database
directly, which can leave the in-memory HNSW index stale. or replace ``knowledge_graph.sqlite3`` directly, which can leave the
in-memory HNSW index stale or pin a closed-on-disk SQLite connection.
""" """
global \ global \
_client_cache, \ _client_cache, \
@@ -1439,6 +1440,15 @@ def tool_reconnect():
# still applies after the reconnect. # still applies after the reconnect.
_vector_disabled = False _vector_disabled = False
_vector_disabled_reason = "" _vector_disabled_reason = ""
# Drain the per-path KnowledgeGraph cache so a replaced sqlite file is
# reopened on the next tool call rather than served from a stale handle.
with _kg_cache_lock:
for kg in _kg_by_path.values():
try:
kg.close()
except Exception:
pass
_kg_by_path.clear()
try: try:
col = _get_collection() col = _get_collection()
if col is None: if col is None:
+42
View File
@@ -1253,3 +1253,45 @@ class TestKGLazyCache:
ids = {id(kg) for kg in results} ids = {id(kg) for kg in results}
assert len(ids) == 1, f"expected 1 unique instance, got {len(ids)}" assert len(ids) == 1, f"expected 1 unique instance, got {len(ids)}"
assert len(mcp_server._kg_by_path) == 1 assert len(mcp_server._kg_by_path) == 1
def test_tool_reconnect_drains_kg_cache(self, monkeypatch):
"""``tool_reconnect`` must close cached KG instances and clear the dict.
Without this, an external replacement of ``knowledge_graph.sqlite3``
leaves the server pinned to a stale ``sqlite3.Connection``.
"""
from mempalace import mcp_server
class _FakeKG:
def __init__(self):
self.closed = False
def close(self):
self.closed = True
fake_a = _FakeKG()
fake_b = _FakeKG()
monkeypatch.setattr(mcp_server, "_kg_by_path", {"/a": fake_a, "/b": fake_b})
# Bypass real ChromaDB so the test isolates KG-cache behaviour.
monkeypatch.setattr(mcp_server, "_get_collection", lambda: None)
mcp_server.tool_reconnect()
assert fake_a.closed is True
assert fake_b.closed is True
assert mcp_server._kg_by_path == {}
def test_tool_reconnect_swallows_kg_close_errors(self, monkeypatch):
"""A failing ``close()`` on one cached KG must not block cache clearing."""
from mempalace import mcp_server
class _BoomKG:
def close(self):
raise RuntimeError("boom")
monkeypatch.setattr(mcp_server, "_kg_by_path", {"/a": _BoomKG()})
monkeypatch.setattr(mcp_server, "_get_collection", lambda: None)
mcp_server.tool_reconnect()
assert mcp_server._kg_by_path == {}