2026-04-07 19:39:06 -03:00
|
|
|
"""
|
|
|
|
|
MCP server tool performance benchmarks.
|
|
|
|
|
|
|
|
|
|
Validates production readiness findings:
|
|
|
|
|
- Finding #3: tool_status() unbounded col.get(include=["metadatas"]) → OOM
|
|
|
|
|
- Finding #7: _get_collection() re-instantiates PersistentClient every call
|
|
|
|
|
- Finding #3 variants: tool_list_wings(), tool_get_taxonomy() same pattern
|
|
|
|
|
|
|
|
|
|
Calls MCP tool handler functions directly with monkeypatched _config.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
import chromadb
|
|
|
|
|
import pytest
|
|
|
|
|
|
2026-04-08 05:10:26 -03:00
|
|
|
from tests.benchmarks.data_generator import PalaceDataGenerator
|
2026-04-07 19:39:06 -03:00
|
|
|
from tests.benchmarks.report import record_metric
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _make_palace(tmp_path, n_drawers, scale="small"):
|
|
|
|
|
"""Create a palace with exactly n_drawers, return palace_path."""
|
|
|
|
|
gen = PalaceDataGenerator(seed=42, scale=scale)
|
|
|
|
|
palace_path = str(tmp_path / "palace")
|
|
|
|
|
gen.populate_palace_directly(palace_path, n_drawers=n_drawers, include_needles=False)
|
|
|
|
|
return palace_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _patch_mcp_config(monkeypatch, palace_path, tmp_path):
|
|
|
|
|
"""Monkeypatch mcp_server._config and _kg to point at test dirs."""
|
|
|
|
|
from mempalace.config import MempalaceConfig
|
|
|
|
|
from mempalace.knowledge_graph import KnowledgeGraph
|
|
|
|
|
|
|
|
|
|
cfg = MempalaceConfig(config_dir=str(tmp_path / "cfg"))
|
|
|
|
|
# Override palace_path directly on the object
|
|
|
|
|
monkeypatch.setattr(cfg, "_file_config", {"palace_path": palace_path})
|
|
|
|
|
|
|
|
|
|
import mempalace.mcp_server as mcp_mod
|
|
|
|
|
|
2026-04-24 12:48:00 +05:00
|
|
|
kg = KnowledgeGraph(db_path=str(tmp_path / "kg.sqlite3"))
|
2026-04-07 19:39:06 -03:00
|
|
|
monkeypatch.setattr(mcp_mod, "_config", cfg)
|
2026-04-24 12:48:00 +05:00
|
|
|
monkeypatch.setattr(mcp_mod, "_get_kg", lambda: kg)
|
2026-04-07 19:39:06 -03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_rss_mb():
|
|
|
|
|
"""Get current process RSS in MB."""
|
|
|
|
|
try:
|
|
|
|
|
import psutil
|
|
|
|
|
|
|
|
|
|
return psutil.Process().memory_info().rss / (1024 * 1024)
|
|
|
|
|
except ImportError:
|
|
|
|
|
import resource
|
|
|
|
|
|
|
|
|
|
# ru_maxrss is in KB on Linux, bytes on macOS
|
|
|
|
|
import platform
|
|
|
|
|
|
|
|
|
|
usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
|
|
|
if platform.system() == "Darwin":
|
|
|
|
|
return usage / (1024 * 1024)
|
|
|
|
|
return usage / 1024
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Tests ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
|
|
|
class TestToolStatusOOM:
|
|
|
|
|
"""Finding #3: tool_status loads ALL metadata into memory."""
|
|
|
|
|
|
|
|
|
|
SIZES = [500, 1_000, 2_500, 5_000]
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("n_drawers", SIZES)
|
|
|
|
|
def test_tool_status_rss_growth(self, n_drawers, tmp_path, monkeypatch):
|
|
|
|
|
"""Measure RSS growth from tool_status at different palace sizes."""
|
|
|
|
|
palace_path = _make_palace(tmp_path, n_drawers)
|
|
|
|
|
_patch_mcp_config(monkeypatch, palace_path, tmp_path)
|
|
|
|
|
|
|
|
|
|
from mempalace.mcp_server import tool_status
|
|
|
|
|
|
|
|
|
|
rss_before = _get_rss_mb()
|
|
|
|
|
result = tool_status()
|
|
|
|
|
rss_after = _get_rss_mb()
|
|
|
|
|
|
|
|
|
|
rss_delta = rss_after - rss_before
|
|
|
|
|
assert "error" not in result, f"tool_status failed: {result}"
|
|
|
|
|
assert result["total_drawers"] == n_drawers
|
|
|
|
|
|
|
|
|
|
record_metric("mcp_status", f"rss_delta_mb_at_{n_drawers}", round(rss_delta, 2))
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("n_drawers", SIZES)
|
|
|
|
|
def test_tool_status_latency(self, n_drawers, tmp_path, monkeypatch):
|
|
|
|
|
"""Measure tool_status response time at different palace sizes."""
|
|
|
|
|
palace_path = _make_palace(tmp_path, n_drawers)
|
|
|
|
|
_patch_mcp_config(monkeypatch, palace_path, tmp_path)
|
|
|
|
|
|
|
|
|
|
from mempalace.mcp_server import tool_status
|
|
|
|
|
|
|
|
|
|
# Warm up
|
|
|
|
|
tool_status()
|
|
|
|
|
|
|
|
|
|
start = time.perf_counter()
|
|
|
|
|
result = tool_status()
|
|
|
|
|
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
|
|
|
|
|
|
|
|
assert "error" not in result
|
|
|
|
|
record_metric("mcp_status", f"latency_ms_at_{n_drawers}", round(elapsed_ms, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
|
|
|
class TestToolListWingsUnbounded:
|
|
|
|
|
"""Finding #3 variant: tool_list_wings also fetches ALL metadata."""
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("n_drawers", [500, 1_000, 2_500, 5_000])
|
|
|
|
|
def test_list_wings_latency(self, n_drawers, tmp_path, monkeypatch):
|
|
|
|
|
palace_path = _make_palace(tmp_path, n_drawers)
|
|
|
|
|
_patch_mcp_config(monkeypatch, palace_path, tmp_path)
|
|
|
|
|
|
|
|
|
|
from mempalace.mcp_server import tool_list_wings
|
|
|
|
|
|
|
|
|
|
start = time.perf_counter()
|
|
|
|
|
result = tool_list_wings()
|
|
|
|
|
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
|
|
|
|
|
|
|
|
assert "wings" in result
|
|
|
|
|
record_metric("mcp_list_wings", f"latency_ms_at_{n_drawers}", round(elapsed_ms, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
|
|
|
class TestToolGetTaxonomyUnbounded:
|
|
|
|
|
"""Finding #3 variant: tool_get_taxonomy also fetches ALL metadata."""
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("n_drawers", [500, 1_000, 2_500, 5_000])
|
|
|
|
|
def test_get_taxonomy_latency(self, n_drawers, tmp_path, monkeypatch):
|
|
|
|
|
palace_path = _make_palace(tmp_path, n_drawers)
|
|
|
|
|
_patch_mcp_config(monkeypatch, palace_path, tmp_path)
|
|
|
|
|
|
|
|
|
|
from mempalace.mcp_server import tool_get_taxonomy
|
|
|
|
|
|
|
|
|
|
start = time.perf_counter()
|
|
|
|
|
result = tool_get_taxonomy()
|
|
|
|
|
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
|
|
|
|
|
|
|
|
assert "taxonomy" in result
|
|
|
|
|
record_metric("mcp_taxonomy", f"latency_ms_at_{n_drawers}", round(elapsed_ms, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
|
|
|
class TestClientReinstantiation:
|
|
|
|
|
"""Finding #7: _get_collection() creates new PersistentClient every call."""
|
|
|
|
|
|
|
|
|
|
def test_reinstantiation_overhead(self, tmp_path, monkeypatch):
|
|
|
|
|
"""Measure cost of 50 _get_collection() calls vs a cached client."""
|
|
|
|
|
palace_path = _make_palace(tmp_path, 500)
|
|
|
|
|
_patch_mcp_config(monkeypatch, palace_path, tmp_path)
|
|
|
|
|
|
|
|
|
|
from mempalace.mcp_server import _get_collection
|
|
|
|
|
|
|
|
|
|
n_calls = 50
|
|
|
|
|
|
|
|
|
|
# Measure re-instantiation (current behavior)
|
|
|
|
|
start = time.perf_counter()
|
|
|
|
|
for _ in range(n_calls):
|
|
|
|
|
col = _get_collection()
|
|
|
|
|
assert col is not None
|
|
|
|
|
uncached_ms = (time.perf_counter() - start) * 1000
|
|
|
|
|
|
|
|
|
|
# Measure cached client (what it should be)
|
|
|
|
|
client = chromadb.PersistentClient(path=palace_path)
|
|
|
|
|
cached_col = client.get_collection("mempalace_drawers")
|
|
|
|
|
start = time.perf_counter()
|
|
|
|
|
for _ in range(n_calls):
|
|
|
|
|
_ = cached_col.count()
|
|
|
|
|
cached_ms = (time.perf_counter() - start) * 1000
|
|
|
|
|
|
|
|
|
|
overhead_ratio = uncached_ms / max(cached_ms, 0.01)
|
|
|
|
|
|
|
|
|
|
record_metric("client_reinstantiation", "uncached_total_ms", round(uncached_ms, 1))
|
|
|
|
|
record_metric("client_reinstantiation", "cached_total_ms", round(cached_ms, 1))
|
|
|
|
|
record_metric("client_reinstantiation", "overhead_ratio", round(overhead_ratio, 2))
|
|
|
|
|
record_metric("client_reinstantiation", "n_calls", n_calls)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
|
|
|
class TestToolSearchLatency:
|
|
|
|
|
"""tool_search uses query() not get(), should scale better."""
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("n_drawers", [500, 1_000, 2_500, 5_000])
|
|
|
|
|
def test_search_latency(self, n_drawers, tmp_path, monkeypatch):
|
|
|
|
|
palace_path = _make_palace(tmp_path, n_drawers)
|
|
|
|
|
_patch_mcp_config(monkeypatch, palace_path, tmp_path)
|
|
|
|
|
|
|
|
|
|
from mempalace.mcp_server import tool_search
|
|
|
|
|
|
|
|
|
|
queries = ["authentication middleware", "database migration", "error handling"]
|
|
|
|
|
latencies = []
|
|
|
|
|
for q in queries:
|
|
|
|
|
start = time.perf_counter()
|
|
|
|
|
result = tool_search(query=q, limit=5)
|
|
|
|
|
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
|
|
|
latencies.append(elapsed_ms)
|
|
|
|
|
assert "error" not in result
|
|
|
|
|
|
|
|
|
|
avg_ms = sum(latencies) / len(latencies)
|
|
|
|
|
record_metric("mcp_search", f"avg_latency_ms_at_{n_drawers}", round(avg_ms, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
|
|
|
class TestDuplicateCheckCost:
|
|
|
|
|
"""tool_add_drawer calls tool_check_duplicate first — measure overhead."""
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("n_drawers", [500, 1_000, 2_500])
|
|
|
|
|
def test_duplicate_check_latency(self, n_drawers, tmp_path, monkeypatch):
|
|
|
|
|
palace_path = _make_palace(tmp_path, n_drawers)
|
|
|
|
|
_patch_mcp_config(monkeypatch, palace_path, tmp_path)
|
|
|
|
|
|
|
|
|
|
from mempalace.mcp_server import tool_check_duplicate
|
|
|
|
|
|
|
|
|
|
test_content = "This is unique test content for duplicate checking benchmark."
|
|
|
|
|
start = time.perf_counter()
|
|
|
|
|
result = tool_check_duplicate(content=test_content)
|
|
|
|
|
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
|
|
|
|
|
|
|
|
assert "error" not in result
|
|
|
|
|
record_metric("mcp_duplicate_check", f"latency_ms_at_{n_drawers}", round(elapsed_ms, 1))
|