From b2f259c25304e89bc19bf767dd8c971251ba7026 Mon Sep 17 00:00:00 2001 From: icciAaron Date: Sun, 19 Apr 2026 15:01:28 -0400 Subject: [PATCH 1/2] fix(mcp): omit palace_path from tool_status responses (+ docs) The MCP `mempalace_status` tool was returning the server's absolute `_config.palace_path` to any connected client on both the main (ChromaDB-backed) path and the sqlite fallback path that runs when HNSW divergence is detected (#1222). On a single-user local deployment this is self-disclosure, but in nested-agent or multi-server MCP topologies the client is a separate trust domain and the absolute path has no documented client-side use. Clients that legitimately need the palace path continue to have three documented channels: the `MEMPALACE_PALACE_PATH` env var (primary) or its legacy `MEMPAL_PALACE_PATH` alias, the `~/.mempalace/config.json` file, and the `--palace` CLI flag on most subcommands. Also corrects stale docs that claimed `mempalace_reconnect` returned a `palace_path` field; the code returns `{success, message, drawers, vector_disabled[, vector_disabled_reason]}` on success, plus a no-palace shape and an exception shape. - mempalace/mcp_server.py: drop palace_path from tool_status() and _tool_status_via_sqlite() result dicts - website/reference/mcp-tools.md: update documented return shapes for mempalace_status (fix) and mempalace_reconnect (stale-docs correction) Authored-by: Aaron Salsitz (ICCI LLC, @icciaaron). Claude Code was used as an authoring and review-orchestration tool, with human-in-the-loop oversight at every step: Aaron wrote the prompts, reviewed each draft, called for three independent review passes (drafting / post-rebase technical / CISA-aligned disclosure-leak), and verified the final patch behavior before commit. --- mempalace/mcp_server.py | 2 -- website/reference/mcp-tools.md | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 13654f6..4aab316 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -454,7 +454,6 @@ def _tool_status_via_sqlite() -> dict: "total_drawers": total, "wings": wings, "rooms": rooms, - "palace_path": _config.palace_path, "protocol": PALACE_PROTOCOL, "aaak_dialect": AAAK_SPEC, "vector_disabled": True, @@ -493,7 +492,6 @@ def tool_status(): "total_drawers": count, "wings": wings, "rooms": rooms, - "palace_path": _config.palace_path, "protocol": PALACE_PROTOCOL, "aaak_dialect": AAAK_SPEC, } diff --git a/website/reference/mcp-tools.md b/website/reference/mcp-tools.md index f951fe1..671225a 100644 --- a/website/reference/mcp-tools.md +++ b/website/reference/mcp-tools.md @@ -10,7 +10,7 @@ Palace overview: total drawers, wing and room counts, AAAK spec, and memory prot **Parameters:** None -**Returns:** `{ total_drawers, wings, rooms, palace_path, protocol, aaak_dialect }` +**Returns:** `{ total_drawers, wings, rooms, protocol, aaak_dialect }` --- @@ -378,4 +378,4 @@ Force a reconnect to the palace database. Use this after external scripts or CLI **Parameters:** None -**Returns:** `{ success, palace_path }` +**Returns:** `{ success, message, drawers, vector_disabled[, vector_disabled_reason] }` (on no-palace: `{ success: false, message, drawers, vector_disabled }`; on exception: `{ success: false, error }`) From 7fc260f75236551707fa3932adb994d4cb3f6332 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 3 May 2026 05:48:41 -0300 Subject: [PATCH 2/2] fix(mcp): basename source_file in tool_get_drawer responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MCP `mempalace_get_drawer` tool returned the entire raw drawer metadata blob to any connected client, and the `source_file` field in that blob is the absolute filesystem path written by the miners (`miner.py`, `convo_miner.py` — `source_file = str(filepath)`). On a single-user local deployment this is self-disclosure, but in nested-agent or multi-server MCP topologies the client is a separate trust domain and the host's directory layout has no documented client-side use. Mirror the mitigation that `searcher.search_memories()` already applies on its own return path: reduce `source_file` to its basename via `Path(source_file).name` before handing the metadata to the client. Citations still work — the directory layout does not leak. Companion to #1 (omit palace_path from tool_status). Same threat class, different surface: - mempalace_status — palace dir path → fixed in #1 - mempalace_get_drawer — per-drawer source_file path → this PR Other read tools were audited and do not leak host paths: - mempalace_search — already basenames source_file - mempalace_list_drawers — returns wing/room/preview only - mempalace_diary_read — date/timestamp/topic/content only - mempalace_reconnect — success/message/drawers only - mempalace_kg_* — entity/predicate strings, counts - mempalace_check_duplicate — wing/room/preview only Changes: - mempalace/mcp_server.py: tool_get_drawer() now basenames metadata.source_file - tests/test_mcp_server.py: regression test asserting the absolute path and its parent directory do not appear anywhere in the response - website/reference/mcp-tools.md: clarify the documented return shape --- mempalace/mcp_server.py | 15 ++++++++++--- tests/test_mcp_server.py | 39 ++++++++++++++++++++++++++++++++++ website/reference/mcp-tools.md | 2 +- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 4aab316..b010ab9 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -912,12 +912,21 @@ def tool_get_drawer(drawer_id: str): return {"error": f"Drawer not found: {drawer_id}"} meta = result["metadatas"][0] doc = result["documents"][0] + # source_file is the absolute filesystem path written by the + # miners. Reduce to its basename before handing it to the MCP + # client — same threat model as the palace_path leak fix: + # nested-agent / multi-server topologies treat the client as a + # separate trust domain. Basename preserves citation utility. + # Mirrors the searcher.search_memories() return shape. + safe_meta = dict(meta) if meta else {} + if safe_meta.get("source_file"): + safe_meta["source_file"] = Path(safe_meta["source_file"]).name return { "drawer_id": drawer_id, "content": doc, - "wing": meta.get("wing", ""), - "room": meta.get("room", ""), - "metadata": meta, + "wing": safe_meta.get("wing", ""), + "room": safe_meta.get("room", ""), + "metadata": safe_meta, } except Exception as e: return {"error": str(e)} diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index f8148af..0e37e35 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -531,6 +531,45 @@ class TestWriteTools: result = tool_get_drawer("nonexistent_drawer") assert "error" in result + def test_get_drawer_does_not_leak_absolute_source_file_path( + self, monkeypatch, config, palace_path, collection, kg + ): + """tool_get_drawer must not expose the absolute filesystem path + that the miners write into ``source_file``. Same threat class as + the palace_path leak in mempalace_status: in nested-agent or + multi-server MCP topologies the client is a separate trust + domain, and the directory layout of the host has no documented + client-side use. Basename is enough for citation.""" + _patch_mcp_server(monkeypatch, config, kg) + + secret_dir = "/private/home/alice/secret-research/2026" + absolute_source = f"{secret_dir}/notes.md" + collection.add( + ids=["drawer_leak_probe"], + documents=["verbatim drawer body for leak probe"], + metadatas=[ + { + "wing": "research", + "room": "notes", + "source_file": absolute_source, + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-03T00:00:00", + } + ], + ) + + from mempalace.mcp_server import tool_get_drawer + + result = tool_get_drawer("drawer_leak_probe") + assert result["drawer_id"] == "drawer_leak_probe" + assert result["metadata"]["source_file"] == "notes.md" + # Defense-in-depth: no field anywhere in the response should + # contain the absolute path or its parent directory. + serialized = json.dumps(result) + assert absolute_source not in serialized + assert secret_dir not in serialized + def test_list_drawers(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_drawers diff --git a/website/reference/mcp-tools.md b/website/reference/mcp-tools.md index 671225a..6866aa6 100644 --- a/website/reference/mcp-tools.md +++ b/website/reference/mcp-tools.md @@ -122,7 +122,7 @@ Fetch a single drawer by ID — returns full content and metadata. |-----------|------|----------|-------------| | `drawer_id` | string | **Yes** | ID of the drawer to fetch | -**Returns:** `{ drawer: { id, wing, room, content, ... } }` +**Returns:** `{ drawer_id, content, wing, room, metadata }` where `metadata.source_file`, when present, is the basename only — the absolute path written by the miners is reduced before the dict is returned to MCP clients. ---