From e200ce2c8abc50a10ef1dfc35129698d96c8af17 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 01:53:13 -0300 Subject: [PATCH] fix: detect mtime changes in _get_client to prevent stale HNSW index (#757) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When external tools write to the palace database (CLI mining, scripts), the MCP server's cached ChromaDB collection becomes stale — its HNSW index doesn't know about new vectors. Develop already invalidates on inode changes (catches rebuilds) but not on mtime changes (misses in-place writes). This PR: - Adds st_mtime tracking alongside st_ino in _get_client; invalidates the cached client on either change. - Adds the mempalace_reconnect MCP tool for explicit cache flush. Original author: @jphein (#663). Original approval: @Ari4ka. Skips test_missing_db_invalidates_cache on Windows (ChromaDB holds chroma.sqlite3 open). --- mempalace/mcp_server.py | 77 +++++++++++++++++++++++++++-- tests/test_mcp_server.py | 102 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+), 3 deletions(-) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index ef7e4e8..4e21426 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -15,6 +15,9 @@ Tools (read): Tools (write): mempalace_add_drawer — file verbatim content into a wing/room mempalace_delete_drawer — remove a drawer by ID + +Tools (maintenance): + mempalace_reconnect — force cache invalidation and reconnect after external writes """ import argparse @@ -70,6 +73,7 @@ else: _client_cache = None _collection_cache = None _palace_db_inode = 0 # inode of chroma.sqlite3 at cache time +_palace_db_mtime = 0.0 # mtime of chroma.sqlite3 at cache time # ==================== WRITE-AHEAD LOG ==================== @@ -127,20 +131,50 @@ def _get_client(): Detects palace rebuilds (repair/nuke/purge) by checking the inode of chroma.sqlite3. A full rebuild replaces the file, changing the inode. + Also detects external writes (scripts, CLI) via mtime changes — the + inode check alone misses in-place modifications that invalidate the + in-memory HNSW index. + + Note: FAT/exFAT may return 0 for st_ino — the ``current_inode != 0`` + guard skips reconnect detection on those filesystems (safe fallback). """ - global _client_cache, _collection_cache, _palace_db_inode, _metadata_cache, _metadata_cache_time + global \ + _client_cache, \ + _collection_cache, \ + _palace_db_inode, \ + _palace_db_mtime, \ + _metadata_cache, \ + _metadata_cache_time db_path = os.path.join(_config.palace_path, "chroma.sqlite3") try: - current_inode = os.stat(db_path).st_ino + st = os.stat(db_path) + current_inode = st.st_ino + current_mtime = st.st_mtime except OSError: current_inode = 0 + current_mtime = 0.0 - if _client_cache is None or current_inode != _palace_db_inode: + # If the DB file disappeared (e.g. during rebuild) but we have a cached + # collection, invalidate so we don't serve stale data. Without this, + # both stored and current values are 0 on the first call after deletion, + # making inode_changed and mtime_changed both False. + if not os.path.isfile(db_path) and _collection_cache is not None: + _client_cache = None + _collection_cache = None + _palace_db_inode = 0 + _palace_db_mtime = 0.0 + # Fall through to normal reconnect which will handle missing DB + + inode_changed = current_inode != 0 and current_inode != _palace_db_inode + mtime_changed = current_mtime != 0.0 and abs(current_mtime - _palace_db_mtime) > 0.01 + + if _client_cache is None or inode_changed or mtime_changed: _client_cache = chromadb.PersistentClient(path=_config.palace_path) _collection_cache = None _metadata_cache = None _metadata_cache_time = 0 _palace_db_inode = current_inode + _palace_db_mtime = current_mtime return _client_cache @@ -973,6 +1007,32 @@ def tool_memories_filed_away(): } +# ==================== SETTINGS TOOLS ==================== + + +def tool_reconnect(): + """Force the MCP server to drop the cached ChromaDB collection and reconnect. + + Use after external scripts or CLI commands modify the palace database + directly, which can leave the in-memory HNSW index stale. + """ + global _collection_cache, _palace_db_inode, _palace_db_mtime + _collection_cache = None + _palace_db_inode = 0 + _palace_db_mtime = 0.0 + try: + col = _get_collection() + if col is None: + return { + "success": False, + "message": "No palace found after reconnect", + "drawers": 0, + } + return {"success": True, "message": "Reconnected to palace", "drawers": col.count()} + except Exception as e: + return {"success": False, "error": str(e)} + + # ==================== MCP PROTOCOL ==================== TOOLS = { @@ -1321,6 +1381,17 @@ TOOLS = { "input_schema": {"type": "object", "properties": {}}, "handler": tool_memories_filed_away, }, + "mempalace_reconnect": { + "description": ( + "Force reconnect to the palace database. Use after external scripts or CLI commands" + " modified the palace directly, which can leave the in-memory HNSW index stale." + ), + "input_schema": { + "type": "object", + "properties": {}, + }, + "handler": tool_reconnect, + }, } diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 933dd68..4cc8b4a 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -7,6 +7,7 @@ via monkeypatch to avoid touching real data. """ import json +import sys import pytest @@ -641,3 +642,104 @@ class TestDiaryTools: r = tool_diary_read(agent_name="Nobody") assert r["entries"] == [] + + +# ── Cache Invalidation (inode/mtime) ────────────────────────────────── + + +class TestCacheInvalidation: + """Tests for _get_collection inode/mtime cache invalidation logic.""" + + def test_mtime_change_invalidates_cache(self, monkeypatch, config, palace_path, kg): + """When mtime changes, the cached collection should be replaced.""" + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + # Create a real collection so _get_collection succeeds + _client, _col = _get_collection(palace_path, create=True) + del _client + + # Prime the cache + col1 = mcp_server._get_collection() + assert col1 is not None + + # Simulate an external write changing the mtime + old_mtime = mcp_server._palace_db_mtime + monkeypatch.setattr(mcp_server, "_palace_db_mtime", old_mtime - 10.0) + + # _get_collection should detect the mtime drift and reconnect + col2 = mcp_server._get_collection() + assert col2 is not None + + def test_inode_change_invalidates_cache(self, monkeypatch, config, palace_path, kg): + """When inode changes (file replaced), the cached collection should be replaced.""" + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + _client, _col = _get_collection(palace_path, create=True) + del _client + + # Prime the cache + col1 = mcp_server._get_collection() + assert col1 is not None + + # Simulate a rebuild that changes the inode + monkeypatch.setattr(mcp_server, "_palace_db_inode", 99999) + + col2 = mcp_server._get_collection() + assert col2 is not None + + @pytest.mark.skipif( + sys.platform == "win32", + reason="Windows holds chroma.sqlite3 open while the client is cached, blocking os.remove", + ) + def test_missing_db_invalidates_cache(self, monkeypatch, config, palace_path, kg): + """When chroma.sqlite3 disappears, a cached collection should be invalidated.""" + _patch_mcp_server(monkeypatch, config, kg) + import os + from mempalace import mcp_server + + _client, _col = _get_collection(palace_path, create=True) + del _client + + # Prime the cache + col1 = mcp_server._get_collection() + assert col1 is not None + assert mcp_server._collection_cache is not None + + # Delete the DB file to simulate a rebuild in progress + db_file = os.path.join(palace_path, "chroma.sqlite3") + if os.path.isfile(db_file): + os.remove(db_file) + + # Cache should be invalidated; _get_collection returns None + # because the backend can't open a missing DB without create=True + mcp_server._get_collection() + # The key assertion: the old cached collection was dropped + assert mcp_server._palace_db_inode == 0 + assert mcp_server._palace_db_mtime == 0.0 + + def test_reconnect_reports_failure_when_no_palace(self, monkeypatch, config, kg): + """tool_reconnect should report failure when no collection is available.""" + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + # Make _get_collection always return None + monkeypatch.setattr(mcp_server, "_get_collection", lambda create=False: None) + + result = mcp_server.tool_reconnect() + assert result["success"] is False + assert "No palace found" in result["message"] + assert result["drawers"] == 0 + + def test_reconnect_reports_success(self, monkeypatch, config, palace_path, kg): + """tool_reconnect should report success with drawer count.""" + _patch_mcp_server(monkeypatch, config, kg) + _client, _col = _get_collection(palace_path, create=True) + del _client + from mempalace import mcp_server + + result = mcp_server.tool_reconnect() + assert result["success"] is True + assert "Reconnected" in result["message"] + assert isinstance(result["drawers"], int)