From 84e2aa16e42d86b96cd74f48cc70ced259e77fec Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 11 Apr 2026 19:44:49 -0700 Subject: [PATCH 1/3] perf: graph cache with write-invalidation in build_graph() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build_graph() scans every drawer's metadata in 1000-item batches on every call — O(n) per graph build with no caching. At 50K+ drawers this costs several seconds per MCP tool call (traverse, find_tunnels, graph_stats all call build_graph on every invocation). Add a module-level cache (nodes + edges + timestamp) with a 60-second TTL. Cache is invalidated via invalidate_graph_cache(), exported for write operations to call. Tests updated with setup_method cache resets and two new tests verifying cache hit and invalidation behaviour. --- mempalace/palace_graph.py | 27 +++++++++++++++++++++++++++ tests/test_palace_graph.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/mempalace/palace_graph.py b/mempalace/palace_graph.py index 71cad89..faeedd3 100644 --- a/mempalace/palace_graph.py +++ b/mempalace/palace_graph.py @@ -18,6 +18,7 @@ No external graph DB needed — built from ChromaDB metadata. import hashlib import json import os +import time from collections import Counter, defaultdict from datetime import datetime, timezone @@ -25,6 +26,20 @@ from .config import MempalaceConfig from .palace import get_collection as _get_palace_collection from .palace import mine_lock +# Module-level graph cache — mirrors _metadata_cache pattern in mcp_server.py +_graph_cache_nodes = None +_graph_cache_edges = None +_graph_cache_time = 0.0 +_GRAPH_CACHE_TTL = 60.0 # seconds — graph changes less often than metadata + + +def invalidate_graph_cache(): + """Clear the graph cache. Called from mcp_server.py on writes.""" + global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time + _graph_cache_nodes = None + _graph_cache_edges = None + _graph_cache_time = 0.0 + def _get_collection(config=None): config = config or MempalaceConfig() @@ -42,10 +57,18 @@ def build_graph(col=None, config=None): """ Build the palace graph from ChromaDB metadata. + Returns cached result if fresh (within TTL). Cache is invalidated + on writes via invalidate_graph_cache(). + Returns: nodes: dict of {room: {wings: set, halls: set, count: int}} edges: list of {room, wing_a, wing_b, hall} — one per tunnel crossing """ + global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time + now = time.time() + if _graph_cache_nodes is not None and (now - _graph_cache_time) < _GRAPH_CACHE_TTL: + return _graph_cache_nodes, _graph_cache_edges + if col is None: col = _get_collection(config) if not col: @@ -101,6 +124,10 @@ def build_graph(col=None, config=None): "dates": sorted(data["dates"])[-5:] if data["dates"] else [], } + _graph_cache_nodes = nodes + _graph_cache_edges = edges + _graph_cache_time = time.time() + return nodes, edges diff --git a/tests/test_palace_graph.py b/tests/test_palace_graph.py index ddda272..37497c1 100644 --- a/tests/test_palace_graph.py +++ b/tests/test_palace_graph.py @@ -30,6 +30,7 @@ with patch.dict("sys.modules", {"chromadb": MagicMock()}): build_graph, find_tunnels, graph_stats, + invalidate_graph_cache, traverse, ) @@ -38,6 +39,9 @@ with patch.dict("sys.modules", {"chromadb": MagicMock()}): class TestBuildGraph: + def setup_method(self): + invalidate_graph_cache() + def test_empty_collection(self): col = _make_fake_collection([]) nodes, edges = build_graph(col=col) @@ -114,11 +118,38 @@ class TestBuildGraph: nodes, _ = build_graph(col=col) assert len(nodes["busy"]["dates"]) <= 5 + def test_cache_returns_same_result(self): + """Second call within TTL returns cached nodes without re-scanning.""" + col = _make_fake_collection( + [{"room": "auth", "wing": "wing_code", "hall": "security", "date": "2026-01-01"}] + ) + nodes1, edges1 = build_graph(col=col) + # Second call with a *different* collection — should still return cached result + col2 = _make_fake_collection([]) + nodes2, edges2 = build_graph(col=col2) + assert nodes1 == nodes2 + assert edges1 == edges2 + + def test_invalidate_clears_cache(self): + """invalidate_graph_cache() forces a fresh scan on next call.""" + col = _make_fake_collection( + [{"room": "auth", "wing": "wing_code", "hall": "security", "date": "2026-01-01"}] + ) + build_graph(col=col) + invalidate_graph_cache() + col_empty = _make_fake_collection([]) + nodes, edges = build_graph(col=col_empty) + assert nodes == {} + assert edges == [] + # --- traverse --- class TestTraverse: + def setup_method(self): + invalidate_graph_cache() + def _build_col(self): return _make_fake_collection( [ @@ -156,6 +187,9 @@ class TestTraverse: class TestFindTunnels: + def setup_method(self): + invalidate_graph_cache() + def _build_tunnel_col(self): return _make_fake_collection( [ @@ -192,6 +226,9 @@ class TestFindTunnels: class TestGraphStats: + def setup_method(self): + invalidate_graph_cache() + def test_empty_graph(self): col = _make_fake_collection([]) stats = graph_stats(col=col) From 1657a79649026f0cd2b16dff23f461550f7fdfa4 Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 11 Apr 2026 20:15:55 -0700 Subject: [PATCH 2/3] fix: clarify cache docs, skip caching empty graphs Addresses Copilot review feedback on #661. Co-Authored-By: Claude Opus 4.6 --- mempalace/palace_graph.py | 14 ++++++++++---- tests/test_palace_graph.py | 7 ++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/mempalace/palace_graph.py b/mempalace/palace_graph.py index faeedd3..96c7c62 100644 --- a/mempalace/palace_graph.py +++ b/mempalace/palace_graph.py @@ -26,7 +26,8 @@ from .config import MempalaceConfig from .palace import get_collection as _get_palace_collection from .palace import mine_lock -# Module-level graph cache — mirrors _metadata_cache pattern in mcp_server.py +# Module-level graph cache with TTL and write-invalidation. +# Warm cache serves build_graph() in O(1); invalidate_graph_cache() clears on writes. _graph_cache_nodes = None _graph_cache_edges = None _graph_cache_time = 0.0 @@ -66,6 +67,8 @@ def build_graph(col=None, config=None): """ global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time now = time.time() + # NOTE: warm cache ignores col/config args — intentional for the MCP server's + # single-palace use case. Callers switching collections must invalidate first. if _graph_cache_nodes is not None and (now - _graph_cache_time) < _GRAPH_CACHE_TTL: return _graph_cache_nodes, _graph_cache_edges @@ -124,9 +127,12 @@ def build_graph(col=None, config=None): "dates": sorted(data["dates"])[-5:] if data["dates"] else [], } - _graph_cache_nodes = nodes - _graph_cache_edges = edges - _graph_cache_time = time.time() + # Only cache non-empty graphs so new data is picked up immediately + # when the palace is first populated. + if nodes: + _graph_cache_nodes = nodes + _graph_cache_edges = edges + _graph_cache_time = time.time() return nodes, edges diff --git a/tests/test_palace_graph.py b/tests/test_palace_graph.py index 37497c1..7bc45e0 100644 --- a/tests/test_palace_graph.py +++ b/tests/test_palace_graph.py @@ -119,7 +119,12 @@ class TestBuildGraph: assert len(nodes["busy"]["dates"]) <= 5 def test_cache_returns_same_result(self): - """Second call within TTL returns cached nodes without re-scanning.""" + """Second call within TTL returns cached nodes without re-scanning. + + The cache intentionally ignores col/config args when warm — this is + correct for the MCP server's single-palace use case. Callers that + switch collections must call invalidate_graph_cache() first. + """ col = _make_fake_collection( [{"room": "auth", "wing": "wing_code", "hall": "security", "date": "2026-01-01"}] ) From 8adf35a13c30c564c4c4eb74df9d026bd1440c46 Mon Sep 17 00:00:00 2001 From: jp Date: Sun, 12 Apr 2026 14:52:53 -0700 Subject: [PATCH 3/3] fix: add threading lock to graph cache, expand docstring Address review feedback from @bensig: 1. Wrap cache reads/writes in threading.Lock for thread safety 2. Promote the col-arg caveat from inline comment to docstring Co-Authored-By: Claude Opus 4.6 --- mempalace/palace_graph.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/mempalace/palace_graph.py b/mempalace/palace_graph.py index 96c7c62..125ec0d 100644 --- a/mempalace/palace_graph.py +++ b/mempalace/palace_graph.py @@ -18,6 +18,7 @@ No external graph DB needed — built from ChromaDB metadata. import hashlib import json import os +import threading import time from collections import Counter, defaultdict from datetime import datetime, timezone @@ -28,6 +29,7 @@ from .palace import mine_lock # Module-level graph cache with TTL and write-invalidation. # Warm cache serves build_graph() in O(1); invalidate_graph_cache() clears on writes. +_graph_cache_lock = threading.Lock() _graph_cache_nodes = None _graph_cache_edges = None _graph_cache_time = 0.0 @@ -37,9 +39,10 @@ _GRAPH_CACHE_TTL = 60.0 # seconds — graph changes less often than metadata def invalidate_graph_cache(): """Clear the graph cache. Called from mcp_server.py on writes.""" global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time - _graph_cache_nodes = None - _graph_cache_edges = None - _graph_cache_time = 0.0 + with _graph_cache_lock: + _graph_cache_nodes = None + _graph_cache_edges = None + _graph_cache_time = 0.0 def _get_collection(config=None): @@ -59,7 +62,11 @@ def build_graph(col=None, config=None): Build the palace graph from ChromaDB metadata. Returns cached result if fresh (within TTL). Cache is invalidated - on writes via invalidate_graph_cache(). + on writes via invalidate_graph_cache(). Thread-safe via _graph_cache_lock. + + Note: warm cache ignores ``col`` and ``config`` arguments — this is + intentional for the MCP server's single-palace use case. Callers + switching collections should call ``invalidate_graph_cache()`` first. Returns: nodes: dict of {room: {wings: set, halls: set, count: int}} @@ -69,8 +76,9 @@ def build_graph(col=None, config=None): now = time.time() # NOTE: warm cache ignores col/config args — intentional for the MCP server's # single-palace use case. Callers switching collections must invalidate first. - if _graph_cache_nodes is not None and (now - _graph_cache_time) < _GRAPH_CACHE_TTL: - return _graph_cache_nodes, _graph_cache_edges + with _graph_cache_lock: + if _graph_cache_nodes is not None and (now - _graph_cache_time) < _GRAPH_CACHE_TTL: + return _graph_cache_nodes, _graph_cache_edges if col is None: col = _get_collection(config) @@ -130,9 +138,10 @@ def build_graph(col=None, config=None): # Only cache non-empty graphs so new data is picked up immediately # when the palace is first populated. if nodes: - _graph_cache_nodes = nodes - _graph_cache_edges = edges - _graph_cache_time = time.time() + with _graph_cache_lock: + _graph_cache_nodes = nodes + _graph_cache_edges = edges + _graph_cache_time = time.time() return nodes, edges