fix(repair): detect HNSW capacity divergence and fall back to BM25 (#1222)

When chromadb's HNSW segment freezes at a stale max_elements while
sqlite keeps accumulating embeddings, the next chromadb open segfaults
the MCP server on every tool call. Adds a pure-filesystem capacity probe
(zero chromadb interaction), a `mempalace repair-status` read-only
health check, and a BM25-only sqlite fallback so the palace stays
reachable even when vector search is unavailable.

* `hnsw_capacity_status` reads sqlite + index_metadata.pickle directly
  via a tight-allowlist unpickler — no hnswlib import, no segment load.
* MCP server runs the probe at startup and after every reconnect; sets
  `_vector_disabled` and routes search to the sqlite FTS5 + BM25 path.
* `tool_status` and `tool_reconnect` surface the fallback state.
* Threshold tuned for chromadb 1.5.x async-flush lag (2× sync_threshold).
This commit is contained in:
Igor Lins e Silva
2026-04-26 19:54:00 -03:00
parent 899a5ec4c6
commit 0d349c3d86
6 changed files with 988 additions and 7 deletions
+115 -3
View File
@@ -57,7 +57,12 @@ from .config import ( # noqa: E402
sanitize_content,
)
from .version import __version__ # noqa: E402
from .backends.chroma import ChromaBackend, ChromaCollection, _pin_hnsw_threads # noqa: E402
from .backends.chroma import ( # noqa: E402
ChromaBackend,
ChromaCollection,
_pin_hnsw_threads,
hnsw_capacity_status,
)
from .query_sanitizer import sanitize_query # noqa: E402
from .searcher import search_memories # noqa: E402
from .palace_graph import ( # noqa: E402
@@ -108,6 +113,52 @@ _collection_cache = None
_palace_db_inode = 0 # inode of chroma.sqlite3 at cache time
_palace_db_mtime = 0.0 # mtime of chroma.sqlite3 at cache time
# ── Vector-search disabled flag (#1222) ──────────────────────────────────
# Set when ``hnsw_capacity_status`` reports a divergence between sqlite
# and the HNSW segment large enough that chromadb would segfault on
# segment load. While this is set, vector-shaped tools (``search``,
# ``check_duplicate``) route to the sqlite-only BM25 fallback in
# :func:`mempalace.searcher._bm25_only_via_sqlite`. Cleared after a
# successful repair via :func:`tool_reconnect` (which re-runs the probe).
_vector_disabled = False
_vector_disabled_reason = ""
_vector_capacity_status: dict | None = None
def _refresh_vector_disabled_flag() -> None:
"""Re-run the HNSW capacity probe and update the module-level flag.
Called from :func:`_get_client` whenever the client cache is rebuilt
(first open or palace replacement). Cheap — pure sqlite + pickle
read, no chromadb interaction. Never raises: a probe that crashes
would defeat the point.
"""
global _vector_disabled, _vector_disabled_reason, _vector_capacity_status
try:
info = hnsw_capacity_status(_config.palace_path, "mempalace_drawers")
except Exception:
logger.debug("HNSW capacity probe raised", exc_info=True)
return
_vector_capacity_status = info
if info.get("diverged"):
if not _vector_disabled:
logger.warning(
"HNSW capacity divergence detected (%s) — routing search to "
"BM25-only sqlite fallback. Run `mempalace repair rebuild` to restore "
"vector search.",
info.get("message", "unknown"),
)
_vector_disabled = True
_vector_disabled_reason = info.get("message", "")
else:
if _vector_disabled:
logger.info(
"HNSW capacity within tolerance (%s) — vector search re-enabled",
info.get("message", ""),
)
_vector_disabled = False
_vector_disabled_reason = ""
# ==================== WRITE-AHEAD LOG ====================
# Every write operation is logged to a JSONL file before execution.
@@ -202,6 +253,11 @@ def _get_client():
mtime_changed = current_mtime != 0.0 and abs(current_mtime - _palace_db_mtime) > 0.01
if _client_cache is None or inode_changed or mtime_changed:
# Run the HNSW capacity probe BEFORE chromadb opens the segment —
# if the index is severely undersized, segment load can segfault
# the whole MCP server (#1222). The probe is pure sqlite +
# metadata-pickle read; never touches the HNSW binary files.
_refresh_vector_disabled_flag()
_client_cache = ChromaBackend.make_client(_config.palace_path)
_collection_cache = None
_metadata_cache = None
@@ -322,6 +378,17 @@ def tool_status():
"protocol": PALACE_PROTOCOL,
"aaak_dialect": AAAK_SPEC,
}
if _vector_disabled:
# Surface the #1222 fallback state so the AI knows search results
# are BM25-only and recommends the operator run repair.
result["vector_disabled"] = True
result["vector_disabled_reason"] = _vector_disabled_reason
if _vector_capacity_status:
result["hnsw_capacity"] = {
"sqlite_count": _vector_capacity_status.get("sqlite_count"),
"hnsw_count": _vector_capacity_status.get("hnsw_count"),
"divergence": _vector_capacity_status.get("divergence"),
}
try:
all_meta = _get_cached_metadata(col)
for m in all_meta:
@@ -456,6 +523,9 @@ def tool_search(
dist = (1.0 - min_similarity) if min_similarity is not None else max_distance
# Mitigate system prompt contamination (Issue #333)
sanitized = sanitize_query(query)
# Ensure the capacity probe has been run at least once before we
# decide which path to take — _get_client triggers it on first open.
_get_client()
result = search_memories(
sanitized["clean_query"],
palace_path=_config.palace_path,
@@ -463,7 +533,11 @@ def tool_search(
room=room,
n_results=limit,
max_distance=dist,
vector_disabled=_vector_disabled,
)
if _vector_disabled:
result["vector_disabled"] = True
result["vector_disabled_reason"] = _vector_disabled_reason
# Attach sanitizer metadata for transparency
if sanitized["was_sanitized"]:
result["query_sanitized"] = True
@@ -482,6 +556,21 @@ def tool_check_duplicate(content: str, threshold: float = 0.9):
col = _get_collection()
if not col:
return _no_palace()
if _vector_disabled:
# Without a usable HNSW we can't compute cosine similarity for
# near-duplicate detection. Report the limitation rather than
# silently returning "not a duplicate" — false negatives here
# would let the AI re-file content the palace already holds.
return {
"is_duplicate": False,
"matches": [],
"vector_disabled": True,
"vector_disabled_reason": _vector_disabled_reason,
"hint": (
"duplicate detection requires vector search; run "
"`mempalace repair rebuild` to restore"
),
}
try:
results = col.query(
query_texts=[content],
@@ -1150,10 +1239,22 @@ def tool_reconnect():
Use after external scripts or CLI commands modify the palace database
directly, which can leave the in-memory HNSW index stale.
"""
global _collection_cache, _palace_db_inode, _palace_db_mtime
global \
_client_cache, \
_collection_cache, \
_palace_db_inode, \
_palace_db_mtime, \
_vector_disabled, \
_vector_disabled_reason
_client_cache = None
_collection_cache = None
_palace_db_inode = 0
_palace_db_mtime = 0.0
# Force probe re-run on next _get_client by clearing the flag now;
# _refresh_vector_disabled_flag will re-set it if the divergence
# still applies after the reconnect.
_vector_disabled = False
_vector_disabled_reason = ""
try:
col = _get_collection()
if col is None:
@@ -1161,8 +1262,15 @@ def tool_reconnect():
"success": False,
"message": "No palace found after reconnect",
"drawers": 0,
"vector_disabled": _vector_disabled,
}
return {"success": True, "message": "Reconnected to palace", "drawers": col.count()}
return {
"success": True,
"message": "Reconnected to palace",
"drawers": col.count(),
"vector_disabled": _vector_disabled,
"vector_disabled_reason": _vector_disabled_reason,
}
except Exception as e:
return {"success": False, "error": str(e)}
@@ -1726,6 +1834,10 @@ def _restore_stdout():
def main():
_restore_stdout()
logger.info("MemPalace MCP Server starting...")
# Pre-flight: probe HNSW capacity before any tool call so the warning
# is visible at startup rather than on first use (#1222). Pure
# filesystem read; never opens a chromadb client.
_refresh_vector_disabled_flag()
while True:
try:
line = sys.stdin.readline()