From 8472d553a34b83cb3ba6d50142640a8d2744d80c Mon Sep 17 00:00:00 2001 From: lcatlett Date: Fri, 1 May 2026 19:26:43 -0400 Subject: [PATCH 1/2] fix(hooks): treat absent ~/.mempalace as auto-save off When the user removes ~/.mempalace/ (a strong "do not auto-capture" signal), the next hook fire would silently recreate the entire dir hierarchy and ingest existing transcripts: 1. _log() at hooks_cli.py:148 unconditionally calls STATE_DIR.mkdir(parents=True, exist_ok=True), so the act of writing the hook log line recreated ~/.mempalace/hook_state/ 2. With no config file present, hook_stop_auto_save and hook_precompact_auto_save defaulted to True (no override to read) 3. The full save path then ran, materializing palace/, wal/, knowledge_graph.sqlite3, and N drawers from existing transcripts in ~/.claude/projects/*.jsonl All four entry points (hook_stop, hook_precompact, hook_session_start, and _log itself) now check a new PALACE_ROOT = Path.home() / ".mempalace" constant first and short-circuit (returning {} on stdout, never logging) when the dir is absent. The user-removable directory is now a kill-switch. Five unit tests in tests/test_hooks_cli.py cover: hook_stop / hook_precompact / hook_session_start do not create the dir when absent; _log() does not create it when absent; existing dir proceeds normally (regression). Caught in the wild on a downstream fork: ~146 drawers materialized in under a second after a deliberate `rm -rf ~/.mempalace/`, into a planning session that was explicitly not meant to be captured. --- CHANGELOG.md | 1 + mempalace/hooks_cli.py | 23 +++++++++++++ tests/test_hooks_cli.py | 76 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41dfaac..7a75842 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Bug Fixes - **MCP server `tool_diary_write` SIGSEGV when default EF provider differs.** `mcp_server._get_collection` bypassed `ChromaBackend.get_collection` and called `client.get_collection` / `client.create_collection` without `embedding_function=`. ChromaDB 1.x persists the EF *identity* (its `name()`) with the collection but not the EF *instance/configuration*, so the MCP server's reopen silently bound chromadb's built-in `DefaultEmbeddingFunction` — its `name()` matches `mempalace.embedding`'s spoofed `"default"` so the identity check passes, but its provider list is chromadb's default rather than the user's resolved device. The miner / Stop hook ingest path routes through the backend helper and binds the configured EF instead. On bleeding-edge interpreters (python 3.14 + chromadb 1.5.x on Apple Silicon) the default provider selection could SIGSEGV the host process on first `col.add()`, killing the MCP stdio server and leaving every subsequent tool call returning `Connection closed` until Claude Code was relaunched. `_get_collection` now reuses `ChromaBackend._resolve_embedding_function()` on the reopen branches that actually open a collection (warm-cache reads stay zero-cost), matching the miner/backend path. (#1299, follow-up to #1262 / #1289) +- **Hooks no longer recreate `~/.mempalace/` after the user removes it.** When `~/.mempalace/` is deleted (a strong "do not auto-capture" signal), the next `Stop`, `PreCompact`, or `SessionStart` hook would silently rebuild the dir hierarchy and ingest existing transcripts: `_log()` called `STATE_DIR.mkdir(parents=True, exist_ok=True)` unconditionally, so the very act of writing `[HH:MM] SESSION START …` recreated `~/.mempalace/hook_state/`; subsequent calls in the save path then materialized `palace/`, `wal/`, `knowledge_graph.sqlite3`, and N drawers from `~/.claude/projects/*.jsonl`. All four entry points (`hook_stop`, `hook_precompact`, `hook_session_start`, and `_log` itself) now check a new module-level `PALACE_ROOT = Path.home() / ".mempalace"` constant first and short-circuit (returning `{}` on stdout, never logging) when the directory is absent. The user-removable directory becomes a kill-switch — `rm -rf ~/.mempalace` is now a stable state. Net: 23 lines added in `mempalace/hooks_cli.py`, 5 unit tests in `tests/test_hooks_cli.py`. (#1305) - **Cross-wing topic tunnels for hyphenated dir names.** `mempalace init` recorded the `topics_by_wing` registry key under the raw directory name (e.g. `mempalace-public`), while `mempalace.yaml`'s `wing` field used the lower-cased + separator-collapsed slug (`mempalace_public`). At mine time the miner read the slug from the yaml and missed the registry, so `_compute_topic_tunnels_for_wing` returned `0` silently. Real-world: any project whose folder contained a hyphen or space lost every topic tunnel. Now both call sites route through a shared `normalize_wing_name()` in `config.py`. (#1194, follow-up to #1180) - **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap. - **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179) diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py index d4f8317..ca8fb60 100644 --- a/mempalace/hooks_cli.py +++ b/mempalace/hooks_cli.py @@ -16,6 +16,18 @@ from pathlib import Path SAVE_INTERVAL = 15 STATE_DIR = Path.home() / ".mempalace" / "hook_state" +PALACE_ROOT = Path.home() / ".mempalace" + + +def _palace_root_exists() -> bool: + """User-removable kill-switch. + + If ~/.mempalace/ does not exist, the user has explicitly cleared it. + All hook side effects (logging, state dir creation, mining, ingestion) + must respect this and short-circuit BEFORE touching disk — including + before logging the short-circuit itself. + """ + return PALACE_ROOT.exists() def _mempalace_python() -> str: @@ -142,6 +154,8 @@ _state_dir_initialized = False def _log(message: str): """Append to hook state log file.""" + if not PALACE_ROOT.exists(): + return # User removed the palace; do not recreate by logging global _state_dir_initialized try: if not _state_dir_initialized: @@ -550,6 +564,9 @@ def _wing_from_transcript_path(transcript_path: str) -> str: def hook_stop(data: dict, harness: str): """Stop hook: block every N messages for auto-save.""" + if not _palace_root_exists(): + _output({}) + return parsed = _parse_harness_input(data, harness) session_id = parsed["session_id"] stop_hook_active = parsed["stop_hook_active"] @@ -659,6 +676,9 @@ def hook_stop(data: dict, harness: str): def hook_session_start(data: dict, harness: str): """Session start hook: initialize session tracking state.""" + if not _palace_root_exists(): + _output({}) + return parsed = _parse_harness_input(data, harness) session_id = parsed["session_id"] @@ -673,6 +693,9 @@ def hook_session_start(data: dict, harness: str): def hook_precompact(data: dict, harness: str): """Precompact hook: mine transcript synchronously, then allow compaction.""" + if not _palace_root_exists(): + _output({}) + return parsed = _parse_harness_input(data, harness) session_id = parsed["session_id"] transcript_path = parsed["transcript_path"] diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py index 1ceb530..941288d 100644 --- a/tests/test_hooks_cli.py +++ b/tests/test_hooks_cli.py @@ -959,3 +959,79 @@ def test_stop_hook_rejects_injected_stop_hook_active(tmp_path): # The injected value is not "true"/"1"/"yes", so the hook should NOT pass through. # Save must have been attempted. assert mock_save.called + + +# --- Absent palace root: hooks must not recreate ~/.mempalace --- +# +# When the user removes ~/.mempalace (e.g. `rm -rf`), that is the strongest +# possible "do not auto-capture" signal. Hooks must short-circuit BEFORE +# touching disk — including before the log-line that previously triggered +# STATE_DIR.mkdir() on its own. + + +import mempalace.hooks_cli as hooks_cli_mod + + +def _redirect_palace_root(monkeypatch, tmp_path): + """Point PALACE_ROOT and STATE_DIR at a tmp location that does NOT exist.""" + fake_root = tmp_path / "absent-mempalace" + monkeypatch.setattr(hooks_cli_mod, "PALACE_ROOT", fake_root) + monkeypatch.setattr(hooks_cli_mod, "STATE_DIR", fake_root / "hook_state") + monkeypatch.setattr(hooks_cli_mod, "_state_dir_initialized", False) + return fake_root + + +def test_hook_stop_does_not_create_palace_dir_when_absent(tmp_path, monkeypatch): + fake_root = _redirect_palace_root(monkeypatch, tmp_path) + transcript = tmp_path / "t.jsonl" + transcript.write_text("") + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + hook_stop( + {"session_id": "absent", "transcript_path": str(transcript), "stop_hook_active": False}, + "claude-code", + ) + assert json.loads(buf.getvalue() or "{}") == {} + assert not fake_root.exists() + + +def test_hook_precompact_does_not_create_palace_dir_when_absent(tmp_path, monkeypatch): + fake_root = _redirect_palace_root(monkeypatch, tmp_path) + transcript = tmp_path / "t.jsonl" + transcript.write_text("") + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + hook_precompact( + {"session_id": "absent", "transcript_path": str(transcript)}, + "claude-code", + ) + assert json.loads(buf.getvalue() or "{}") == {} + assert not fake_root.exists() + + +def test_hook_session_start_does_not_create_palace_dir_when_absent(tmp_path, monkeypatch): + fake_root = _redirect_palace_root(monkeypatch, tmp_path) + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + hook_session_start({"session_id": "absent"}, "claude-code") + assert json.loads(buf.getvalue() or "{}") == {} + assert not fake_root.exists() + + +def test_log_does_not_create_palace_dir_when_absent(tmp_path, monkeypatch): + fake_root = _redirect_palace_root(monkeypatch, tmp_path) + _log("test message") + assert not fake_root.exists() + + +def test_existing_dir_proceeds_normally(tmp_path, monkeypatch): + """Regression: when PALACE_ROOT exists, hooks must proceed (no short-circuit).""" + fake_root = tmp_path / "present-mempalace" + fake_root.mkdir() + monkeypatch.setattr(hooks_cli_mod, "PALACE_ROOT", fake_root) + monkeypatch.setattr(hooks_cli_mod, "STATE_DIR", fake_root / "hook_state") + monkeypatch.setattr(hooks_cli_mod, "_state_dir_initialized", False) + _log("test message") + # _log should have created the state dir under the existing palace root + assert (fake_root / "hook_state").exists() + assert (fake_root / "hook_state" / "hook.log").is_file() From 2d50b214d4c2c05112a3258faa8bef39cf6c07c0 Mon Sep 17 00:00:00 2001 From: lcatlett Date: Sat, 2 May 2026 20:37:47 -0400 Subject: [PATCH 2/2] fix(hooks): use is_dir() for palace root check (review feedback) Both @igorls and the Qodo bot flagged that `_palace_root_exists()` used `Path.exists()`, which returns True for a regular file. A stray file at `~/.mempalace` would let the kill-switch be bypassed and crash later in `STATE_DIR.mkdir()` with NotADirectoryError. Switched to `Path.is_dir()`. Also fold `_log()`'s inline check through `_palace_root_exists()` so both kill-switch sites use the same predicate. New test pins the behavior: a regular file at the palace root path is treated as absent (hook short-circuits, _log does not crash, the stray file is left untouched). --- mempalace/hooks_cli.py | 9 +++++++-- tests/test_hooks_cli.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py index ca8fb60..8498103 100644 --- a/mempalace/hooks_cli.py +++ b/mempalace/hooks_cli.py @@ -26,8 +26,13 @@ def _palace_root_exists() -> bool: All hook side effects (logging, state dir creation, mining, ingestion) must respect this and short-circuit BEFORE touching disk — including before logging the short-circuit itself. + + Uses ``is_dir()`` rather than ``exists()`` so a stray regular file at + ``~/.mempalace`` (or a broken symlink) is treated as absent — otherwise + the kill-switch would be bypassed and ``STATE_DIR.mkdir()`` would later + crash on ``NotADirectoryError``. """ - return PALACE_ROOT.exists() + return PALACE_ROOT.is_dir() def _mempalace_python() -> str: @@ -154,7 +159,7 @@ _state_dir_initialized = False def _log(message: str): """Append to hook state log file.""" - if not PALACE_ROOT.exists(): + if not _palace_root_exists(): return # User removed the palace; do not recreate by logging global _state_dir_initialized try: diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py index 941288d..487acf7 100644 --- a/tests/test_hooks_cli.py +++ b/tests/test_hooks_cli.py @@ -1035,3 +1035,35 @@ def test_existing_dir_proceeds_normally(tmp_path, monkeypatch): # _log should have created the state dir under the existing palace root assert (fake_root / "hook_state").exists() assert (fake_root / "hook_state" / "hook.log").is_file() + + +def test_regular_file_at_palace_root_treated_as_absent(tmp_path, monkeypatch): + """A regular file at ~/.mempalace must be treated the same as absent. + + ``Path.exists()`` returns True for a regular file, which would let the + kill-switch be bypassed and crash later when ``STATE_DIR.mkdir()`` runs + on ``NotADirectoryError``. ``_palace_root_exists()`` must use + ``is_dir()`` so a stray file (or broken symlink) short-circuits cleanly. + """ + fake_root = tmp_path / "file-not-dir" + fake_root.write_text("oops, this is a file not a directory") + monkeypatch.setattr(hooks_cli_mod, "PALACE_ROOT", fake_root) + monkeypatch.setattr(hooks_cli_mod, "STATE_DIR", fake_root / "hook_state") + monkeypatch.setattr(hooks_cli_mod, "_state_dir_initialized", False) + + # _palace_root_exists() is the source of truth — it must return False. + assert hooks_cli_mod._palace_root_exists() is False + + # Hooks must short-circuit (return {} on stdout) and not touch disk. + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + hook_session_start({"session_id": "file-at-root"}, "claude-code") + assert json.loads(buf.getvalue() or "{}") == {} + + # _log must also short-circuit — it must NOT try to mkdir a path under a + # regular file (which would raise NotADirectoryError). + _log("test message") # would raise if not short-circuited + + # The stray file is left untouched; we never try to convert it. + assert fake_root.is_file() + assert fake_root.read_text() == "oops, this is a file not a directory"