fix: detect mtime changes in _get_client to prevent stale HNSW index (#757)
When external tools write to the palace database (CLI mining, scripts), the MCP server's cached ChromaDB collection becomes stale — its HNSW index doesn't know about new vectors. Develop already invalidates on inode changes (catches rebuilds) but not on mtime changes (misses in-place writes). This PR: - Adds st_mtime tracking alongside st_ino in _get_client; invalidates the cached client on either change. - Adds the mempalace_reconnect MCP tool for explicit cache flush. Original author: @jphein (#663). Original approval: @Ari4ka. Skips test_missing_db_invalidates_cache on Windows (ChromaDB holds chroma.sqlite3 open).
This commit is contained in:
committed by
GitHub
parent
39e1651af3
commit
e200ce2c8a
+74
-3
@@ -15,6 +15,9 @@ Tools (read):
|
|||||||
Tools (write):
|
Tools (write):
|
||||||
mempalace_add_drawer — file verbatim content into a wing/room
|
mempalace_add_drawer — file verbatim content into a wing/room
|
||||||
mempalace_delete_drawer — remove a drawer by ID
|
mempalace_delete_drawer — remove a drawer by ID
|
||||||
|
|
||||||
|
Tools (maintenance):
|
||||||
|
mempalace_reconnect — force cache invalidation and reconnect after external writes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@@ -70,6 +73,7 @@ else:
|
|||||||
_client_cache = None
|
_client_cache = None
|
||||||
_collection_cache = None
|
_collection_cache = None
|
||||||
_palace_db_inode = 0 # inode of chroma.sqlite3 at cache time
|
_palace_db_inode = 0 # inode of chroma.sqlite3 at cache time
|
||||||
|
_palace_db_mtime = 0.0 # mtime of chroma.sqlite3 at cache time
|
||||||
|
|
||||||
|
|
||||||
# ==================== WRITE-AHEAD LOG ====================
|
# ==================== WRITE-AHEAD LOG ====================
|
||||||
@@ -127,20 +131,50 @@ def _get_client():
|
|||||||
|
|
||||||
Detects palace rebuilds (repair/nuke/purge) by checking the inode of
|
Detects palace rebuilds (repair/nuke/purge) by checking the inode of
|
||||||
chroma.sqlite3. A full rebuild replaces the file, changing the inode.
|
chroma.sqlite3. A full rebuild replaces the file, changing the inode.
|
||||||
|
Also detects external writes (scripts, CLI) via mtime changes — the
|
||||||
|
inode check alone misses in-place modifications that invalidate the
|
||||||
|
in-memory HNSW index.
|
||||||
|
|
||||||
|
Note: FAT/exFAT may return 0 for st_ino — the ``current_inode != 0``
|
||||||
|
guard skips reconnect detection on those filesystems (safe fallback).
|
||||||
"""
|
"""
|
||||||
global _client_cache, _collection_cache, _palace_db_inode, _metadata_cache, _metadata_cache_time
|
global \
|
||||||
|
_client_cache, \
|
||||||
|
_collection_cache, \
|
||||||
|
_palace_db_inode, \
|
||||||
|
_palace_db_mtime, \
|
||||||
|
_metadata_cache, \
|
||||||
|
_metadata_cache_time
|
||||||
db_path = os.path.join(_config.palace_path, "chroma.sqlite3")
|
db_path = os.path.join(_config.palace_path, "chroma.sqlite3")
|
||||||
try:
|
try:
|
||||||
current_inode = os.stat(db_path).st_ino
|
st = os.stat(db_path)
|
||||||
|
current_inode = st.st_ino
|
||||||
|
current_mtime = st.st_mtime
|
||||||
except OSError:
|
except OSError:
|
||||||
current_inode = 0
|
current_inode = 0
|
||||||
|
current_mtime = 0.0
|
||||||
|
|
||||||
if _client_cache is None or current_inode != _palace_db_inode:
|
# If the DB file disappeared (e.g. during rebuild) but we have a cached
|
||||||
|
# collection, invalidate so we don't serve stale data. Without this,
|
||||||
|
# both stored and current values are 0 on the first call after deletion,
|
||||||
|
# making inode_changed and mtime_changed both False.
|
||||||
|
if not os.path.isfile(db_path) and _collection_cache is not None:
|
||||||
|
_client_cache = None
|
||||||
|
_collection_cache = None
|
||||||
|
_palace_db_inode = 0
|
||||||
|
_palace_db_mtime = 0.0
|
||||||
|
# Fall through to normal reconnect which will handle missing DB
|
||||||
|
|
||||||
|
inode_changed = current_inode != 0 and current_inode != _palace_db_inode
|
||||||
|
mtime_changed = current_mtime != 0.0 and abs(current_mtime - _palace_db_mtime) > 0.01
|
||||||
|
|
||||||
|
if _client_cache is None or inode_changed or mtime_changed:
|
||||||
_client_cache = chromadb.PersistentClient(path=_config.palace_path)
|
_client_cache = chromadb.PersistentClient(path=_config.palace_path)
|
||||||
_collection_cache = None
|
_collection_cache = None
|
||||||
_metadata_cache = None
|
_metadata_cache = None
|
||||||
_metadata_cache_time = 0
|
_metadata_cache_time = 0
|
||||||
_palace_db_inode = current_inode
|
_palace_db_inode = current_inode
|
||||||
|
_palace_db_mtime = current_mtime
|
||||||
return _client_cache
|
return _client_cache
|
||||||
|
|
||||||
|
|
||||||
@@ -973,6 +1007,32 @@ def tool_memories_filed_away():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== SETTINGS TOOLS ====================
|
||||||
|
|
||||||
|
|
||||||
|
def tool_reconnect():
|
||||||
|
"""Force the MCP server to drop the cached ChromaDB collection and reconnect.
|
||||||
|
|
||||||
|
Use after external scripts or CLI commands modify the palace database
|
||||||
|
directly, which can leave the in-memory HNSW index stale.
|
||||||
|
"""
|
||||||
|
global _collection_cache, _palace_db_inode, _palace_db_mtime
|
||||||
|
_collection_cache = None
|
||||||
|
_palace_db_inode = 0
|
||||||
|
_palace_db_mtime = 0.0
|
||||||
|
try:
|
||||||
|
col = _get_collection()
|
||||||
|
if col is None:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"message": "No palace found after reconnect",
|
||||||
|
"drawers": 0,
|
||||||
|
}
|
||||||
|
return {"success": True, "message": "Reconnected to palace", "drawers": col.count()}
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
# ==================== MCP PROTOCOL ====================
|
# ==================== MCP PROTOCOL ====================
|
||||||
|
|
||||||
TOOLS = {
|
TOOLS = {
|
||||||
@@ -1321,6 +1381,17 @@ TOOLS = {
|
|||||||
"input_schema": {"type": "object", "properties": {}},
|
"input_schema": {"type": "object", "properties": {}},
|
||||||
"handler": tool_memories_filed_away,
|
"handler": tool_memories_filed_away,
|
||||||
},
|
},
|
||||||
|
"mempalace_reconnect": {
|
||||||
|
"description": (
|
||||||
|
"Force reconnect to the palace database. Use after external scripts or CLI commands"
|
||||||
|
" modified the palace directly, which can leave the in-memory HNSW index stale."
|
||||||
|
),
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {},
|
||||||
|
},
|
||||||
|
"handler": tool_reconnect,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ via monkeypatch to avoid touching real data.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -641,3 +642,104 @@ class TestDiaryTools:
|
|||||||
|
|
||||||
r = tool_diary_read(agent_name="Nobody")
|
r = tool_diary_read(agent_name="Nobody")
|
||||||
assert r["entries"] == []
|
assert r["entries"] == []
|
||||||
|
|
||||||
|
|
||||||
|
# ── Cache Invalidation (inode/mtime) ──────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestCacheInvalidation:
|
||||||
|
"""Tests for _get_collection inode/mtime cache invalidation logic."""
|
||||||
|
|
||||||
|
def test_mtime_change_invalidates_cache(self, monkeypatch, config, palace_path, kg):
|
||||||
|
"""When mtime changes, the cached collection should be replaced."""
|
||||||
|
_patch_mcp_server(monkeypatch, config, kg)
|
||||||
|
from mempalace import mcp_server
|
||||||
|
|
||||||
|
# Create a real collection so _get_collection succeeds
|
||||||
|
_client, _col = _get_collection(palace_path, create=True)
|
||||||
|
del _client
|
||||||
|
|
||||||
|
# Prime the cache
|
||||||
|
col1 = mcp_server._get_collection()
|
||||||
|
assert col1 is not None
|
||||||
|
|
||||||
|
# Simulate an external write changing the mtime
|
||||||
|
old_mtime = mcp_server._palace_db_mtime
|
||||||
|
monkeypatch.setattr(mcp_server, "_palace_db_mtime", old_mtime - 10.0)
|
||||||
|
|
||||||
|
# _get_collection should detect the mtime drift and reconnect
|
||||||
|
col2 = mcp_server._get_collection()
|
||||||
|
assert col2 is not None
|
||||||
|
|
||||||
|
def test_inode_change_invalidates_cache(self, monkeypatch, config, palace_path, kg):
|
||||||
|
"""When inode changes (file replaced), the cached collection should be replaced."""
|
||||||
|
_patch_mcp_server(monkeypatch, config, kg)
|
||||||
|
from mempalace import mcp_server
|
||||||
|
|
||||||
|
_client, _col = _get_collection(palace_path, create=True)
|
||||||
|
del _client
|
||||||
|
|
||||||
|
# Prime the cache
|
||||||
|
col1 = mcp_server._get_collection()
|
||||||
|
assert col1 is not None
|
||||||
|
|
||||||
|
# Simulate a rebuild that changes the inode
|
||||||
|
monkeypatch.setattr(mcp_server, "_palace_db_inode", 99999)
|
||||||
|
|
||||||
|
col2 = mcp_server._get_collection()
|
||||||
|
assert col2 is not None
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform == "win32",
|
||||||
|
reason="Windows holds chroma.sqlite3 open while the client is cached, blocking os.remove",
|
||||||
|
)
|
||||||
|
def test_missing_db_invalidates_cache(self, monkeypatch, config, palace_path, kg):
|
||||||
|
"""When chroma.sqlite3 disappears, a cached collection should be invalidated."""
|
||||||
|
_patch_mcp_server(monkeypatch, config, kg)
|
||||||
|
import os
|
||||||
|
from mempalace import mcp_server
|
||||||
|
|
||||||
|
_client, _col = _get_collection(palace_path, create=True)
|
||||||
|
del _client
|
||||||
|
|
||||||
|
# Prime the cache
|
||||||
|
col1 = mcp_server._get_collection()
|
||||||
|
assert col1 is not None
|
||||||
|
assert mcp_server._collection_cache is not None
|
||||||
|
|
||||||
|
# Delete the DB file to simulate a rebuild in progress
|
||||||
|
db_file = os.path.join(palace_path, "chroma.sqlite3")
|
||||||
|
if os.path.isfile(db_file):
|
||||||
|
os.remove(db_file)
|
||||||
|
|
||||||
|
# Cache should be invalidated; _get_collection returns None
|
||||||
|
# because the backend can't open a missing DB without create=True
|
||||||
|
mcp_server._get_collection()
|
||||||
|
# The key assertion: the old cached collection was dropped
|
||||||
|
assert mcp_server._palace_db_inode == 0
|
||||||
|
assert mcp_server._palace_db_mtime == 0.0
|
||||||
|
|
||||||
|
def test_reconnect_reports_failure_when_no_palace(self, monkeypatch, config, kg):
|
||||||
|
"""tool_reconnect should report failure when no collection is available."""
|
||||||
|
_patch_mcp_server(monkeypatch, config, kg)
|
||||||
|
from mempalace import mcp_server
|
||||||
|
|
||||||
|
# Make _get_collection always return None
|
||||||
|
monkeypatch.setattr(mcp_server, "_get_collection", lambda create=False: None)
|
||||||
|
|
||||||
|
result = mcp_server.tool_reconnect()
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "No palace found" in result["message"]
|
||||||
|
assert result["drawers"] == 0
|
||||||
|
|
||||||
|
def test_reconnect_reports_success(self, monkeypatch, config, palace_path, kg):
|
||||||
|
"""tool_reconnect should report success with drawer count."""
|
||||||
|
_patch_mcp_server(monkeypatch, config, kg)
|
||||||
|
_client, _col = _get_collection(palace_path, create=True)
|
||||||
|
del _client
|
||||||
|
from mempalace import mcp_server
|
||||||
|
|
||||||
|
result = mcp_server.tool_reconnect()
|
||||||
|
assert result["success"] is True
|
||||||
|
assert "Reconnected" in result["message"]
|
||||||
|
assert isinstance(result["drawers"], int)
|
||||||
|
|||||||
Reference in New Issue
Block a user