From eb4de0433962df4c42ac081dc22611b005096b66 Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Mon, 27 Apr 2026 00:32:35 -0300
Subject: [PATCH 1/3] fix(hooks): always mine the active transcript as convos,
additive to MEMPAL_DIR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
#1230 fixed --mode convos for the case where MEMPAL_DIR was unset, but
left two configurations broken:
- MEMPAL_DIR set to a project dir: convos never mined (MEMPAL_DIR
overrode the transcript path); only project files were ingested.
- MEMPAL_DIR set to a conversations dir per the old hooks/README: the
projects miner ran on JSONL — same wrong-miner behaviour.
The shell hooks (mempal_save_hook.sh, mempal_precompact_hook.sh) had
the same MEMPAL_DIR-overrides-transcript bug AND were missing --mode
on every spawned `mempalace mine` call.
Make the auto-ingest *additive*. _get_mine_dir → _get_mine_targets,
returning a list of (dir, mode) pairs:
- MEMPAL_DIR (when valid) contributes (dir, "projects")
- A valid transcript JSONL contributes (parent, "convos")
- Both can appear together; the hook spawns one ingest per target
Same change applied to the shell save and precompact hooks. Precompact
also gained transcript_path parsing so it can run the convos mine
synchronously before context is compressed. hooks/README.md updated to
describe MEMPAL_DIR as a project-files target, never a convos target.
---
hooks/README.md | 2 +-
hooks/mempal_precompact_hook.sh | 44 ++++++++----
hooks/mempal_save_hook.sh | 36 +++++-----
mempalace/hooks_cli.py | 78 ++++++++++++---------
tests/test_hooks_cli.py | 119 +++++++++++++++++++++++---------
tests/test_save_hook_mines.py | 31 ++++-----
6 files changed, 197 insertions(+), 113 deletions(-)
diff --git a/hooks/README.md b/hooks/README.md
index 7794527..469d231 100644
--- a/hooks/README.md
+++ b/hooks/README.md
@@ -67,7 +67,7 @@ Edit `mempal_save_hook.sh` to change:
- **`SAVE_INTERVAL=15`** — How many human messages between saves. Lower = more frequent saves, higher = less interruption.
- **`STATE_DIR`** — Where hook state is stored (defaults to `~/.mempalace/hook_state/`)
-- **`MEMPAL_DIR`** — Optional. Set to a conversations directory to auto-run `mempalace mine
` on each save trigger. Leave blank (default) to let the AI handle saving via the block reason message.
+- **`MEMPAL_DIR`** — Optional **project directory** (code, notes, docs) to also mine on each save trigger, with `--mode projects`. The hook ALWAYS mines the active conversation transcript automatically with `--mode convos` — `MEMPAL_DIR` is purely additive, never an override. Leave blank if you don't want to ingest project files.
- **`MEMPALACE_PYTHON`** — Optional env var. Python interpreter with mempalace + chromadb installed. Auto-detects: `MEMPALACE_PYTHON` env var → repo `venv/bin/python3` → system `python3`. Set this if your venv is in a non-standard location.
### mempalace CLI
diff --git a/hooks/mempal_precompact_hook.sh b/hooks/mempal_precompact_hook.sh
index a14a0d0..70377f6 100755
--- a/hooks/mempal_precompact_hook.sh
+++ b/hooks/mempal_precompact_hook.sh
@@ -41,17 +41,18 @@
# to save everything. After the AI saves, compaction proceeds normally.
#
# === MEMPALACE CLI ===
-# This repo uses: mempalace mine
-# or: mempalace mine --mode convos
-# Set MEMPAL_DIR below if you want the hook to auto-ingest before compaction.
-# Leave blank to rely on the AI's own save instructions.
+# The hook ALWAYS mines the active conversation transcript synchronously
+# before compaction (via `mempalace mine --mode convos`).
+# MEMPAL_DIR is an *additional*, optional target for project files — it
+# does not replace the conversation mine.
STATE_DIR="$HOME/.mempalace/hook_state"
mkdir -p "$STATE_DIR"
-# Optional: set to the directory you want auto-ingested before compaction.
-# Example: MEMPAL_DIR="$HOME/conversations"
-# Leave empty to skip auto-ingest (AI handles saving via the block reason).
+# Optional: project directory (code / notes / docs) to also mine before
+# compaction. Mined with `--mode projects`. The conversation transcript
+# is always mined regardless — this is purely additive.
+# Example: MEMPAL_DIR="$HOME/projects/my_app"
MEMPAL_DIR=""
# Resolve the Python interpreter. Same contract as mempal_save_hook.sh:
@@ -64,15 +65,34 @@ fi
# Read JSON input from stdin
INPUT=$(cat)
-SESSION_ID=$(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null)
+# Parse session_id and transcript_path in one call. Sanitize both before
+# interpolating into shell — same contract as mempal_save_hook.sh.
+eval $(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
+import sys, json, re
+data = json.load(sys.stdin)
+sid = data.get('session_id', 'unknown')
+tp = data.get('transcript_path', '')
+safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
+print(f'SESSION_ID=\"{safe(sid)}\"')
+print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
+" 2>/dev/null)
+
+# Expand ~ in path
+TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log"
-# Optional: run mempalace ingest synchronously so memories land before compaction
+# Run ingest synchronously so memories land before compaction. Two
+# independent targets — both run if both are set:
+# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
+# 2. MEMPAL_DIR → --mode projects
+if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then
+ mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
+ >> "$STATE_DIR/hook.log" 2>&1
+fi
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- REPO_DIR="$(dirname "$SCRIPT_DIR")"
- mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1
+ mempalace mine "$MEMPAL_DIR" --mode projects \
+ >> "$STATE_DIR/hook.log" 2>&1
fi
# Silent: return empty JSON to not block. "decision": "allow" is invalid —
diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh
index 228b41b..d69ae85 100755
--- a/hooks/mempal_save_hook.sh
+++ b/hooks/mempal_save_hook.sh
@@ -45,10 +45,10 @@
# stop_hook_active=true so we let it through. No infinite loop.
#
# === MEMPALACE CLI ===
-# This repo uses: mempalace mine
-# or: mempalace mine --mode convos
-# Set MEMPAL_DIR below if you want the hook to auto-ingest after blocking.
-# Leave blank to rely on the AI's own save instructions.
+# The hook ALWAYS mines the active conversation transcript automatically
+# (via `mempalace mine --mode convos`). MEMPAL_DIR is an
+# *additional*, optional target for project files — it does not replace
+# the conversation mine.
#
# === CONFIGURATION ===
@@ -56,9 +56,10 @@ SAVE_INTERVAL=15 # Save every N human messages (adjust to taste)
STATE_DIR="$HOME/.mempalace/hook_state"
mkdir -p "$STATE_DIR"
-# Optional: set to the directory you want auto-ingested on each save trigger.
-# Example: MEMPAL_DIR="$HOME/conversations"
-# Leave empty to skip auto-ingest (AI handles saving via the block reason).
+# Optional: project directory (code / notes / docs) to also mine each
+# save trigger. Mined with `--mode projects`. The conversation transcript
+# is always mined regardless — this is purely additive.
+# Example: MEMPAL_DIR="$HOME/projects/my_app"
MEMPAL_DIR=""
# Resolve the Python interpreter the hook should use.
@@ -157,19 +158,20 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
echo "[$(date '+%H:%M:%S')] TRIGGERING SAVE at exchange $EXCHANGE_COUNT" >> "$STATE_DIR/hook.log"
- # Auto-mine the transcript. Two paths:
- # 1. TRANSCRIPT_PATH (from Claude Code) — mine the directory it lives in
- # 2. MEMPAL_DIR (user-configured) — mine that directory
- # At least one should work. If neither is set, nothing mines.
- MINE_DIR=""
+ # Auto-mine. Two independent targets — both run if both are set:
+ # 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
+ # (Claude Code session JSONL — must use the convo miner)
+ # 2. MEMPAL_DIR (user-configured project) → --mode projects
+ # (code, notes, docs)
+ # MEMPAL_DIR is *additive*, not an override: a user with MEMPAL_DIR
+ # pointed at their project still gets the active conversation mined.
if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then
- MINE_DIR="$(dirname "$TRANSCRIPT_PATH")"
+ mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
+ >> "$STATE_DIR/hook.log" 2>&1 &
fi
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
- MINE_DIR="$MEMPAL_DIR"
- fi
- if [ -n "$MINE_DIR" ]; then
- mempalace mine "$MINE_DIR" >> "$STATE_DIR/hook.log" 2>&1 &
+ mempalace mine "$MEMPAL_DIR" --mode projects \
+ >> "$STATE_DIR/hook.log" 2>&1 &
fi
# MEMPAL_VERBOSE toggle:
diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index 49b77e2..0645c41 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -197,27 +197,27 @@ def _output(data: dict):
sys.stdout.buffer.flush()
-def _get_mine_dir(transcript_path: str = "") -> tuple[str, str]:
- """Determine directory to mine and the miner mode to use.
+def _get_mine_targets(transcript_path: str = "") -> list[tuple[str, str]]:
+ """Return the list of ``(dir, mode)`` targets for auto-ingest.
- Returns ``(dir, mode)`` where ``mode`` is ``"projects"`` or ``"convos"``.
- Empty ``dir`` means no ingest should run.
+ MEMPAL_DIR (when set and resolvable) contributes a ``"projects"``
+ target. A valid transcript JSONL contributes a ``"convos"`` target
+ on its parent dir. Both may appear together — the hook runs one
+ ingest per target, so a user with MEMPAL_DIR pointed at their
+ project dir still gets the active conversation mined verbatim.
- MEMPAL_DIR is treated as a project directory ("projects" mode). The
- transcript-path fallback resolves to the parent of a Claude Code
- session JSONL, which must be mined with the conversation miner —
- running the projects miner there ingests JSONL as if it were source
- code.
+ An empty list means no ingest should run.
"""
+ targets: list[tuple[str, str]] = []
mempal_dir = os.environ.get("MEMPAL_DIR", "")
if mempal_dir:
resolved = Path(mempal_dir).expanduser().resolve()
if resolved.is_dir():
- return str(resolved), "projects"
+ targets.append((str(resolved), "projects"))
path = _validate_transcript_path(transcript_path)
if path is not None and path.is_file():
- return str(path.parent), "convos"
- return "", "projects"
+ targets.append((str(path.parent), "convos"))
+ return targets
_MINE_PID_FILE = STATE_DIR / "mine.pid"
@@ -275,36 +275,46 @@ def _spawn_mine(cmd: list) -> None:
def _maybe_auto_ingest(transcript_path: str = ""):
- """Run mempalace mine in background if a mine directory is available."""
- mine_dir, mode = _get_mine_dir(transcript_path)
- if not mine_dir:
+ """Run mempalace mine in background for every available target."""
+ targets = _get_mine_targets(transcript_path)
+ if not targets:
return
if _mine_already_running():
_log("Skipping auto-ingest: mine already running")
return
- try:
- _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode])
- except OSError:
- pass
+ for mine_dir, mode in targets:
+ try:
+ _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode])
+ except OSError:
+ pass
def _mine_sync(transcript_path: str = ""):
- """Run mempalace mine synchronously (for precompact -- data must land first)."""
- mine_dir, mode = _get_mine_dir(transcript_path)
- if not mine_dir:
+ """Run mempalace mine synchronously for every target (precompact)."""
+ targets = _get_mine_targets(transcript_path)
+ if not targets:
return
- try:
- STATE_DIR.mkdir(parents=True, exist_ok=True)
- log_path = STATE_DIR / "hook.log"
- with open(log_path, "a") as log_f:
- subprocess.run(
- [sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode],
- stdout=log_f,
- stderr=log_f,
- timeout=60,
- )
- except (OSError, subprocess.TimeoutExpired):
- pass
+ STATE_DIR.mkdir(parents=True, exist_ok=True)
+ log_path = STATE_DIR / "hook.log"
+ for mine_dir, mode in targets:
+ try:
+ with open(log_path, "a") as log_f:
+ subprocess.run(
+ [
+ sys.executable,
+ "-m",
+ "mempalace",
+ "mine",
+ mine_dir,
+ "--mode",
+ mode,
+ ],
+ stdout=log_f,
+ stderr=log_f,
+ timeout=60,
+ )
+ except (OSError, subprocess.TimeoutExpired):
+ pass
def _desktop_toast(body: str, title: str = "MemPalace"):
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index 6763439..c105eae 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -12,7 +12,7 @@ from mempalace.hooks_cli import (
SAVE_INTERVAL,
_count_human_messages,
_extract_recent_messages,
- _get_mine_dir,
+ _get_mine_targets,
_log,
_maybe_auto_ingest,
_mempalace_python,
@@ -494,6 +494,43 @@ def test_mine_sync_with_env_uses_projects_mode(tmp_path):
assert cmd[cmd.index("--mode") + 1] == "projects"
+def test_maybe_auto_ingest_with_both_set(tmp_path):
+ """When MEMPAL_DIR and a transcript are both set, BOTH spawns happen."""
+ mempal_dir = tmp_path / "project"
+ mempal_dir.mkdir()
+ convo_dir = tmp_path / "convos"
+ convo_dir.mkdir()
+ transcript = convo_dir / "session.jsonl"
+ transcript.write_text("")
+ with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
+ with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+ with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
+ with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
+ _maybe_auto_ingest(str(transcript))
+ assert mock_popen.call_count == 2
+ cmds = [call.args[0] for call in mock_popen.call_args_list]
+ modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds}
+ assert modes == {"projects", "convos"}
+
+
+def test_mine_sync_with_both_set(tmp_path):
+ """Precompact sync runs BOTH mines when MEMPAL_DIR + transcript are set."""
+ mempal_dir = tmp_path / "project"
+ mempal_dir.mkdir()
+ convo_dir = tmp_path / "convos"
+ convo_dir.mkdir()
+ transcript = convo_dir / "session.jsonl"
+ transcript.write_text("")
+ with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
+ with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+ with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
+ _mine_sync(str(transcript))
+ assert mock_run.call_count == 2
+ cmds = [call.args[0] for call in mock_run.call_args_list]
+ modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds}
+ assert modes == {"projects", "convos"}
+
+
def test_maybe_auto_ingest_oserror(tmp_path):
"""OSError during subprocess spawn is silenced."""
mempal_dir = tmp_path / "project"
@@ -550,70 +587,90 @@ def test_mine_already_running_corrupt_file(tmp_path):
assert _mine_already_running() is False
-# --- _get_mine_dir ---
+# --- _get_mine_targets ---
-def test_get_mine_dir_mempal_dir(tmp_path):
- """MEMPAL_DIR takes priority, is expanded/resolved, and is treated as projects mode."""
+def test_get_mine_targets_mempal_dir_only(tmp_path):
+ """MEMPAL_DIR alone yields a single projects target, expanded/resolved."""
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
- transcript = tmp_path / "t.jsonl"
- transcript.write_text("")
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
- result_dir, result_mode = _get_mine_dir(str(transcript))
- assert Path(result_dir).resolve() == mempal_dir.resolve()
- assert result_mode == "projects"
+ targets = _get_mine_targets("")
+ assert len(targets) == 1
+ assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
+ assert targets[0][1] == "projects"
-def test_get_mine_dir_mempal_dir_tilde(tmp_path):
+def test_get_mine_targets_mempal_dir_tilde(tmp_path):
"""MEMPAL_DIR with a tilde prefix is expanded correctly."""
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
home = Path.home()
- # Build a ~-relative path only if tmp_path is inside home
try:
rel = mempal_dir.relative_to(home)
except ValueError:
pytest.skip("tmp_path is not under home, cannot build ~-relative path")
tilde_path = "~/" + str(rel)
with patch.dict("os.environ", {"MEMPAL_DIR": tilde_path}):
- result_dir, result_mode = _get_mine_dir("")
- assert Path(result_dir).resolve() == mempal_dir.resolve()
- assert result_mode == "projects"
+ targets = _get_mine_targets("")
+ assert len(targets) == 1
+ assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
+ assert targets[0][1] == "projects"
-def test_get_mine_dir_transcript_fallback(tmp_path):
- """Transcript fallback resolves to its parent dir in convos mode."""
+def test_get_mine_targets_transcript_only(tmp_path):
+ """A valid transcript JSONL alone yields a single convos target."""
transcript = tmp_path / "t.jsonl"
transcript.write_text("")
with patch.dict("os.environ", {}, clear=True):
- result_dir, result_mode = _get_mine_dir(str(transcript))
- assert Path(result_dir).resolve() == tmp_path.resolve()
- assert result_mode == "convos"
+ targets = _get_mine_targets(str(transcript))
+ assert len(targets) == 1
+ assert Path(targets[0][0]).resolve() == tmp_path.resolve()
+ assert targets[0][1] == "convos"
-def test_get_mine_dir_transcript_path_traversal_rejected(tmp_path):
- """Transcript paths with '..' components are rejected and return no dir."""
+def test_get_mine_targets_both_set(tmp_path):
+ """When MEMPAL_DIR and a valid transcript are both set, BOTH targets appear.
+
+ This is the regression that motivates the additive-targets design:
+ users who set MEMPAL_DIR previously had their conversations silently
+ skipped because MEMPAL_DIR overrode the transcript path.
+ """
+ mempal_dir = tmp_path / "project"
+ mempal_dir.mkdir()
+ convo_dir = tmp_path / "convos"
+ convo_dir.mkdir()
+ transcript = convo_dir / "session.jsonl"
+ transcript.write_text("")
+ with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
+ targets = _get_mine_targets(str(transcript))
+ modes = {mode for _, mode in targets}
+ assert modes == {"projects", "convos"}
+ by_mode = {mode: Path(d) for d, mode in targets}
+ assert by_mode["projects"].resolve() == mempal_dir.resolve()
+ assert by_mode["convos"].resolve() == convo_dir.resolve()
+
+
+def test_get_mine_targets_transcript_path_traversal_rejected(tmp_path):
+ """Transcript paths with '..' components are rejected (no convos target)."""
with patch.dict("os.environ", {}, clear=True):
- result_dir, result_mode = _get_mine_dir("../../etc/passwd")
- assert result_dir == ""
- assert result_mode == "projects"
+ targets = _get_mine_targets("../../etc/passwd")
+ assert targets == []
-def test_get_mine_dir_transcript_non_jsonl_rejected(tmp_path):
+def test_get_mine_targets_transcript_non_jsonl_rejected(tmp_path):
"""Transcript paths without .jsonl/.json extension are rejected."""
bad = tmp_path / "notes.txt"
bad.write_text("content")
with patch.dict("os.environ", {}, clear=True):
- result_dir, result_mode = _get_mine_dir(str(bad))
- assert result_dir == ""
- assert result_mode == "projects"
+ targets = _get_mine_targets(str(bad))
+ assert targets == []
-def test_get_mine_dir_empty():
- """Returns empty dir when nothing is available."""
+def test_get_mine_targets_empty():
+ """Returns empty list when nothing is available."""
with patch.dict("os.environ", {}, clear=True):
- assert _get_mine_dir("") == ("", "projects")
+ assert _get_mine_targets("") == []
# --- _parse_harness_input ---
diff --git a/tests/test_save_hook_mines.py b/tests/test_save_hook_mines.py
index a702a42..93a51f2 100644
--- a/tests/test_save_hook_mines.py
+++ b/tests/test_save_hook_mines.py
@@ -16,7 +16,8 @@ class TestSaveHookAutoMines:
def test_hook_mines_transcript_path(self):
"""The hook receives TRANSCRIPT_PATH from Claude Code.
- It should use that to mine the conversation, not depend on MEMPAL_DIR."""
+ It should use that to mine the conversation as --mode convos,
+ independently of MEMPAL_DIR (which is for project files only)."""
hook_path = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"hooks",
@@ -24,23 +25,17 @@ class TestSaveHookAutoMines:
)
src = open(hook_path).read()
- # The hook ALREADY receives TRANSCRIPT_PATH in the JSON input.
- # It should use this to mine the current session's transcript
- # regardless of whether MEMPAL_DIR is set.
- # The hook must have a path that uses TRANSCRIPT_PATH to determine
- # what to mine, separate from the MEMPAL_DIR path.
- uses_transcript = "TRANSCRIPT_PATH" in src
- has_mine = "mempalace mine" in src
- # TRANSCRIPT_PATH must appear in the mining logic, not just the parse block
- transcript_drives_mine = "MINE_DIR" in src and "dirname" in src and "TRANSCRIPT_PATH" in src
-
- assert uses_transcript and has_mine and transcript_drives_mine, (
- "Save hook only mines when MEMPAL_DIR is set (defaults to empty). "
- "The hook receives TRANSCRIPT_PATH from Claude Code — it should "
- "mine that file automatically so conversations are saved without "
- "the user setting an env var. Currently the hook says 'saved in "
- "background' but nothing actually saves."
- )
+ # The hook must drive the conversation mine off TRANSCRIPT_PATH,
+ # using `dirname` to derive the parent dir, and tagging it with
+ # `--mode convos` so the convo miner runs (not the projects miner).
+ assert "TRANSCRIPT_PATH" in src, "hook must read transcript_path"
+ assert "mempalace mine" in src, "hook must invoke `mempalace mine`"
+ assert (
+ 'dirname "$TRANSCRIPT_PATH"' in src
+ ), "hook must mine the transcript's parent directory"
+ assert (
+ "--mode convos" in src
+ ), "transcript mine must use --mode convos, not the projects miner"
def test_mempal_dir_default_not_empty(self):
"""If MEMPAL_DIR is still used, it should have a sensible default,
From fe5679776274a23f175f23bd1bdb57ed42094e6d Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Mon, 27 Apr 2026 02:26:53 -0300
Subject: [PATCH 2/3] fix(hooks): consolidate transcript ingest, harden shell
parsers (#1231 review)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Address Copilot review on #1231:
1. Stop double-mining the transcript on the Python side. ``_get_mine_targets``
now returns only the ``MEMPAL_DIR`` projects target — the convos target
for the transcript dir is dropped because ``_ingest_transcript`` already
handles it on every hook fire. The duplicate spawn was using
``sys.executable`` (vs ``_mempalace_python()``) and a different ``--wing``,
so each Stop/PreCompact event was writing the same transcript into two
wings under asymmetric interpreters and overwriting the single
``_MINE_PID_FILE`` lock.
2. ``_maybe_auto_ingest`` and ``_mine_sync`` now spawn via
``_mempalace_python()`` so the resolved interpreter matches the venv
that owns mempalace (matters under GUI-launched harnesses where
``sys.executable`` may resolve to a system Python without chromadb).
3. Replace ``eval $(...)`` in both shell hooks with a ``mapfile``-based
reader. Sanitized values are still emitted by the same Python parser,
but the shell now does plain variable assignment instead of executing
the parser's stdout — smaller blast radius if the sanitizer is ever
bypassed.
4. Mirror ``_validate_transcript_path`` in the shell hooks via a
``is_valid_transcript_path`` helper — extension + traversal-segment
rejection, parity with the Python validator. The convos mine in each
shell hook is now gated on the validator instead of bare ``-f``.
5. Tighten the ``..`` traversal test that previously exercised the
suffix gate by mistake (``../../etc/passwd`` lacks ``.json[l]``).
Use ``.jsonl`` paths with traversal segments to actually hit the
``..`` rejection branch.
6. README: add a one-liner pointing at ``mempalace sweep`` for users
who want per-message recall on top of the file-level chunks the
hooks produce. The sweeper was undiscoverable previously.
Tests: 1418 passed, 1 skipped (full suite minus benchmarks).
---
README.md | 4 +
hooks/mempal_precompact_hook.sh | 35 +++++-
hooks/mempal_save_hook.sh | 41 +++++--
mempalace/hooks_cli.py | 51 ++++----
tests/test_hooks_cli.py | 200 ++++++++++++++++----------------
tests/test_save_hook_mines.py | 62 ++++++++++
6 files changed, 261 insertions(+), 132 deletions(-)
diff --git a/README.md b/README.md
index acbeb14..8157fca 100644
--- a/README.md
+++ b/README.md
@@ -149,6 +149,10 @@ system prompt:
Two Claude Code hooks save periodically and before context compression:
[mempalaceofficial.com/guide/hooks](https://mempalaceofficial.com/guide/hooks.html).
+For per-message recall on top of the file-level chunks the hooks produce,
+run `mempalace sweep ` periodically — it stores one
+verbatim drawer per user/assistant message, idempotent and resume-safe.
+
---
## Requirements
diff --git a/hooks/mempal_precompact_hook.sh b/hooks/mempal_precompact_hook.sh
index 70377f6..e811d65 100755
--- a/hooks/mempal_precompact_hook.sh
+++ b/hooks/mempal_precompact_hook.sh
@@ -65,30 +65,53 @@ fi
# Read JSON input from stdin
INPUT=$(cat)
-# Parse session_id and transcript_path in one call. Sanitize both before
-# interpolating into shell — same contract as mempal_save_hook.sh.
-eval $(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
+# Parse session_id and transcript_path in one call. Sanitize both, then
+# read sanitized values from one-per-line stdout into shell variables —
+# avoids ``eval`` on generated code (#1231 review). Same contract as
+# mempal_save_hook.sh.
+mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
import sys, json, re
data = json.load(sys.stdin)
sid = data.get('session_id', 'unknown')
tp = data.get('transcript_path', '')
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
-print(f'SESSION_ID=\"{safe(sid)}\"')
-print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
+print(safe(sid))
+print(safe(tp))
" 2>/dev/null)
+SESSION_ID="${_mempal_parsed[0]:-unknown}"
+TRANSCRIPT_PATH="${_mempal_parsed[1]:-}"
# Expand ~ in path
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
+# Validate that TRANSCRIPT_PATH looks like a transcript file. Mirrors
+# mempalace.hooks_cli._validate_transcript_path so the shell hook
+# rejects the same shapes the Python hook rejects (#1231 review).
+is_valid_transcript_path() {
+ local path="$1"
+ [ -n "$path" ] || return 1
+ case "$path" in
+ *.json|*.jsonl) ;;
+ *) return 1 ;;
+ esac
+ case "/$path/" in
+ */../*) return 1 ;;
+ esac
+ return 0
+}
+
echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log"
# Run ingest synchronously so memories land before compaction. Two
# independent targets — both run if both are set:
# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
# 2. MEMPAL_DIR → --mode projects
-if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then
+if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
>> "$STATE_DIR/hook.log" 2>&1
+elif [ -n "$TRANSCRIPT_PATH" ]; then
+ echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
+ >> "$STATE_DIR/hook.log"
fi
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
mempalace mine "$MEMPAL_DIR" --mode projects \
diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh
index d69ae85..5efd157 100755
--- a/hooks/mempal_save_hook.sh
+++ b/hooks/mempal_save_hook.sh
@@ -83,9 +83,11 @@ fi
INPUT=$(cat)
# Parse all fields in a single Python call (3x faster than separate invocations)
-# SECURITY: All values are sanitized before being interpolated into shell assignments.
-# stop_hook_active is coerced to a strict True/False to prevent command injection via eval.
-eval $(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
+# without invoking ``eval`` on generated code: Python prints one sanitized
+# value per line, the shell reads them via ``mapfile`` and does plain
+# variable assignment — same data, smaller blast radius if the sanitizer
+# is ever bypassed (#1231 review).
+mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
import sys, json, re
data = json.load(sys.stdin)
sid = data.get('session_id', 'unknown')
@@ -95,14 +97,36 @@ tp = data.get('transcript_path', '')
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
# Coerce stop_hook_active to strict boolean string
sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
-print(f'SESSION_ID=\"{safe(sid)}\"')
-print(f'STOP_HOOK_ACTIVE=\"{sha}\"')
-print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
+print(safe(sid))
+print(sha)
+print(safe(tp))
" 2>/dev/null)
+SESSION_ID="${_mempal_parsed[0]:-unknown}"
+STOP_HOOK_ACTIVE="${_mempal_parsed[1]:-False}"
+TRANSCRIPT_PATH="${_mempal_parsed[2]:-}"
# Expand ~ in path
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
+# Validate that TRANSCRIPT_PATH looks like a transcript file:
+# - non-empty
+# - .jsonl or .json suffix
+# - no traversal segments (.. components)
+# Mirrors mempalace.hooks_cli._validate_transcript_path so the shell hook
+# rejects the same shapes the Python hook rejects (#1231 review).
+is_valid_transcript_path() {
+ local path="$1"
+ [ -n "$path" ] || return 1
+ case "$path" in
+ *.json|*.jsonl) ;;
+ *) return 1 ;;
+ esac
+ case "/$path/" in
+ */../*) return 1 ;;
+ esac
+ return 0
+}
+
# If we're already in a save cycle, let the AI stop normally
# This is the infinite-loop prevention: block once → AI saves → tries to stop again → we let it through
if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
@@ -165,9 +189,12 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
# (code, notes, docs)
# MEMPAL_DIR is *additive*, not an override: a user with MEMPAL_DIR
# pointed at their project still gets the active conversation mined.
- if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then
+ if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
>> "$STATE_DIR/hook.log" 2>&1 &
+ elif [ -n "$TRANSCRIPT_PATH" ]; then
+ echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
+ >> "$STATE_DIR/hook.log"
fi
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
mempalace mine "$MEMPAL_DIR" --mode projects \
diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index 0645c41..d4f8317 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -197,16 +197,15 @@ def _output(data: dict):
sys.stdout.buffer.flush()
-def _get_mine_targets(transcript_path: str = "") -> list[tuple[str, str]]:
+def _get_mine_targets() -> list[tuple[str, str]]:
"""Return the list of ``(dir, mode)`` targets for auto-ingest.
MEMPAL_DIR (when set and resolvable) contributes a ``"projects"``
- target. A valid transcript JSONL contributes a ``"convos"`` target
- on its parent dir. Both may appear together — the hook runs one
- ingest per target, so a user with MEMPAL_DIR pointed at their
- project dir still gets the active conversation mined verbatim.
+ target. Transcript ingestion is handled separately by
+ ``_ingest_transcript`` — emitting it here too would double-mine the
+ same JSONL into a different wing on every hook fire (#1231 review).
- An empty list means no ingest should run.
+ An empty list means no MEMPAL_DIR ingest should run.
"""
targets: list[tuple[str, str]] = []
mempal_dir = os.environ.get("MEMPAL_DIR", "")
@@ -214,9 +213,6 @@ def _get_mine_targets(transcript_path: str = "") -> list[tuple[str, str]]:
resolved = Path(mempal_dir).expanduser().resolve()
if resolved.is_dir():
targets.append((str(resolved), "projects"))
- path = _validate_transcript_path(transcript_path)
- if path is not None and path.is_file():
- targets.append((str(path.parent), "convos"))
return targets
@@ -274,9 +270,15 @@ def _spawn_mine(cmd: list) -> None:
_MINE_PID_FILE.write_text(str(proc.pid))
-def _maybe_auto_ingest(transcript_path: str = ""):
- """Run mempalace mine in background for every available target."""
- targets = _get_mine_targets(transcript_path)
+def _maybe_auto_ingest():
+ """Background-mine MEMPAL_DIR (project files) if set.
+
+ Transcript convos are ingested separately via ``_ingest_transcript``
+ in the hook handlers — this function does not handle them, to avoid
+ asymmetric interpreter handling and PID-file overwrite when both
+ targets fire from a single hook call (#1231 review).
+ """
+ targets = _get_mine_targets()
if not targets:
return
if _mine_already_running():
@@ -284,14 +286,19 @@ def _maybe_auto_ingest(transcript_path: str = ""):
return
for mine_dir, mode in targets:
try:
- _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode])
+ _spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode])
except OSError:
pass
-def _mine_sync(transcript_path: str = ""):
- """Run mempalace mine synchronously for every target (precompact)."""
- targets = _get_mine_targets(transcript_path)
+def _mine_sync():
+ """Synchronously mine MEMPAL_DIR (precompact path).
+
+ Transcript convos are ingested separately via ``_ingest_transcript``
+ in ``hook_precompact`` — keeping them out of this function avoids
+ timeout stacking against the harness 30s ceiling (#1231 review).
+ """
+ targets = _get_mine_targets()
if not targets:
return
STATE_DIR.mkdir(parents=True, exist_ok=True)
@@ -301,7 +308,7 @@ def _mine_sync(transcript_path: str = ""):
with open(log_path, "a") as log_f:
subprocess.run(
[
- sys.executable,
+ _mempalace_python(),
"-m",
"mempalace",
"mine",
@@ -613,7 +620,7 @@ def hook_stop(data: dict, harness: str):
transcript_path, session_id, wing=project_wing, toast=toast
)
_ingest_transcript(transcript_path)
- _maybe_auto_ingest(transcript_path)
+ _maybe_auto_ingest()
# Only advance save marker after successful save
count = result.get("count", 0)
if count > 0:
@@ -643,7 +650,7 @@ def hook_stop(data: dict, harness: str):
pass
if transcript_path:
_ingest_transcript(transcript_path)
- _maybe_auto_ingest(transcript_path)
+ _maybe_auto_ingest()
reason = STOP_BLOCK_REASON + f" Write diary entry to wing={project_wing}."
_output({"decision": "block", "reason": reason})
else:
@@ -676,8 +683,10 @@ def hook_precompact(data: dict, harness: str):
if transcript_path:
_ingest_transcript(transcript_path)
- # Mine synchronously so data lands before compaction proceeds
- _mine_sync(transcript_path)
+ # Mine MEMPAL_DIR synchronously so project data lands before
+ # compaction proceeds. Transcript convos were already kicked off
+ # above via _ingest_transcript.
+ _mine_sync()
_output({})
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index c105eae..1ceb530 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -450,35 +450,26 @@ def test_maybe_auto_ingest_with_env(tmp_path):
assert cmd[cmd.index("--mode") + 1] == "projects"
-def test_maybe_auto_ingest_with_transcript(tmp_path):
- """Transcript fallback spawns mine in convos mode against the JSONL parent."""
- transcript = tmp_path / "t.jsonl"
- transcript.write_text("")
- with patch.dict("os.environ", {}, clear=True):
+def test_maybe_auto_ingest_uses_mempalace_python(tmp_path):
+ """Spawned mine command uses _mempalace_python(), not bare sys.executable.
+
+ Hook subprocesses inherit the harness PATH which on GUI-launched
+ Claude Code may resolve to a system Python without chromadb. The
+ interpreter used here must be the same one the hook itself runs
+ under (typically the venv that owns mempalace).
+ """
+ mempal_dir = tmp_path / "project"
+ mempal_dir.mkdir()
+ with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
- with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
- _maybe_auto_ingest(str(transcript))
- mock_popen.assert_called_once()
- cmd = mock_popen.call_args[0][0]
- assert "mine" in cmd
- assert str(tmp_path.resolve()) in cmd
- assert cmd[cmd.index("--mode") + 1] == "convos"
-
-
-def test_mine_sync_with_transcript_uses_convos_mode(tmp_path):
- """Precompact sync path also picks convos mode for JSONL transcripts."""
- transcript = tmp_path / "t.jsonl"
- transcript.write_text("")
- with patch.dict("os.environ", {}, clear=True):
- with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
- with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
- _mine_sync(str(transcript))
- mock_run.assert_called_once()
- cmd = mock_run.call_args[0][0]
- assert "mine" in cmd
- assert str(tmp_path.resolve()) in cmd
- assert cmd[cmd.index("--mode") + 1] == "convos"
+ with patch(
+ "mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"
+ ):
+ with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
+ _maybe_auto_ingest()
+ cmd = mock_popen.call_args[0][0]
+ assert cmd[0] == "/fake/venv/python"
def test_mine_sync_with_env_uses_projects_mode(tmp_path):
@@ -494,41 +485,55 @@ def test_mine_sync_with_env_uses_projects_mode(tmp_path):
assert cmd[cmd.index("--mode") + 1] == "projects"
-def test_maybe_auto_ingest_with_both_set(tmp_path):
- """When MEMPAL_DIR and a transcript are both set, BOTH spawns happen."""
+def test_mine_sync_uses_mempalace_python(tmp_path):
+ """Sync mine command uses _mempalace_python(), not bare sys.executable."""
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
+ with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
+ with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+ with patch("mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"):
+ with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
+ _mine_sync()
+ cmd = mock_run.call_args[0][0]
+ assert cmd[0] == "/fake/venv/python"
+
+
+def test_maybe_auto_ingest_ignores_transcript_arg_path(tmp_path):
+ """_maybe_auto_ingest does NOT mine the transcript directory.
+
+ Transcript convos are handled by _ingest_transcript (called separately
+ in hook handlers). _maybe_auto_ingest only handles MEMPAL_DIR — even
+ when invoked in a context where a transcript is also being processed,
+ no second spawn for the transcript dir should appear here.
+ """
convo_dir = tmp_path / "convos"
convo_dir.mkdir()
transcript = convo_dir / "session.jsonl"
transcript.write_text("")
- with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
+ with patch.dict("os.environ", {}, clear=True):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
- _maybe_auto_ingest(str(transcript))
- assert mock_popen.call_count == 2
- cmds = [call.args[0] for call in mock_popen.call_args_list]
- modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds}
- assert modes == {"projects", "convos"}
+ _maybe_auto_ingest()
+ mock_popen.assert_not_called()
-def test_mine_sync_with_both_set(tmp_path):
- """Precompact sync runs BOTH mines when MEMPAL_DIR + transcript are set."""
- mempal_dir = tmp_path / "project"
- mempal_dir.mkdir()
+def test_mine_sync_ignores_transcript(tmp_path):
+ """_mine_sync does not run a convos mine for the transcript dir.
+
+ The precompact transcript ingest is the responsibility of
+ _ingest_transcript; routing it through _mine_sync would stack a
+ second 60s timeout against the harness 30s ceiling.
+ """
convo_dir = tmp_path / "convos"
convo_dir.mkdir()
transcript = convo_dir / "session.jsonl"
transcript.write_text("")
- with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
+ with patch.dict("os.environ", {}, clear=True):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
- _mine_sync(str(transcript))
- assert mock_run.call_count == 2
- cmds = [call.args[0] for call in mock_run.call_args_list]
- modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds}
- assert modes == {"projects", "convos"}
+ _mine_sync()
+ mock_run.assert_not_called()
def test_maybe_auto_ingest_oserror(tmp_path):
@@ -595,7 +600,7 @@ def test_get_mine_targets_mempal_dir_only(tmp_path):
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
- targets = _get_mine_targets("")
+ targets = _get_mine_targets()
assert len(targets) == 1
assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
assert targets[0][1] == "projects"
@@ -612,65 +617,53 @@ def test_get_mine_targets_mempal_dir_tilde(tmp_path):
pytest.skip("tmp_path is not under home, cannot build ~-relative path")
tilde_path = "~/" + str(rel)
with patch.dict("os.environ", {"MEMPAL_DIR": tilde_path}):
- targets = _get_mine_targets("")
+ targets = _get_mine_targets()
assert len(targets) == 1
assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
assert targets[0][1] == "projects"
-def test_get_mine_targets_transcript_only(tmp_path):
- """A valid transcript JSONL alone yields a single convos target."""
+def test_get_mine_targets_no_transcript_target(tmp_path):
+ """_get_mine_targets does not emit a convos target for the transcript path.
+
+ Transcript ingestion is owned by _ingest_transcript; emitting it
+ here too would double-mine the same JSONL into a different wing on
+ every hook fire (#1231 review).
+ """
transcript = tmp_path / "t.jsonl"
transcript.write_text("")
with patch.dict("os.environ", {}, clear=True):
- targets = _get_mine_targets(str(transcript))
- assert len(targets) == 1
- assert Path(targets[0][0]).resolve() == tmp_path.resolve()
- assert targets[0][1] == "convos"
+ targets = _get_mine_targets()
+ assert targets == []
-def test_get_mine_targets_both_set(tmp_path):
- """When MEMPAL_DIR and a valid transcript are both set, BOTH targets appear.
-
- This is the regression that motivates the additive-targets design:
- users who set MEMPAL_DIR previously had their conversations silently
- skipped because MEMPAL_DIR overrode the transcript path.
- """
+def test_get_mine_targets_only_returns_mempal_dir(tmp_path):
+ """When MEMPAL_DIR is set, exactly one projects target — never a convos target."""
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
- convo_dir = tmp_path / "convos"
- convo_dir.mkdir()
- transcript = convo_dir / "session.jsonl"
- transcript.write_text("")
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
- targets = _get_mine_targets(str(transcript))
- modes = {mode for _, mode in targets}
- assert modes == {"projects", "convos"}
- by_mode = {mode: Path(d) for d, mode in targets}
- assert by_mode["projects"].resolve() == mempal_dir.resolve()
- assert by_mode["convos"].resolve() == convo_dir.resolve()
+ targets = _get_mine_targets()
+ assert len(targets) == 1
+ assert targets[0][1] == "projects"
-def test_get_mine_targets_transcript_path_traversal_rejected(tmp_path):
- """Transcript paths with '..' components are rejected (no convos target)."""
- with patch.dict("os.environ", {}, clear=True):
- targets = _get_mine_targets("../../etc/passwd")
- assert targets == []
+def test_validate_transcript_path_traversal_rejected_jsonl(tmp_path):
+ """Path traversal is rejected even when the path has a .jsonl suffix.
-
-def test_get_mine_targets_transcript_non_jsonl_rejected(tmp_path):
- """Transcript paths without .jsonl/.json extension are rejected."""
- bad = tmp_path / "notes.txt"
- bad.write_text("content")
- with patch.dict("os.environ", {}, clear=True):
- targets = _get_mine_targets(str(bad))
- assert targets == []
+ The pre-fix test used "../../etc/passwd" which lacks an extension and
+ so was rejected by the suffix gate before the traversal check ever
+ fired (Copilot review on #1231). Use a .jsonl path with `..`
+ segments to exercise the traversal guard specifically.
+ """
+ assert _validate_transcript_path("../t.jsonl") is None
+ assert _validate_transcript_path("a/../b.jsonl") is None
+ assert _validate_transcript_path("/tmp/../etc/t.jsonl") is None
def test_get_mine_targets_empty():
- """Returns empty list when nothing is available."""
+ """Returns empty list when MEMPAL_DIR is unset or invalid."""
with patch.dict("os.environ", {}, clear=True):
- assert _get_mine_targets("") == []
+ assert _get_mine_targets() == []
# --- _parse_harness_input ---
@@ -790,22 +783,33 @@ def test_precompact_with_timeout(tmp_path):
def test_precompact_mines_transcript_dir(tmp_path, monkeypatch):
- """Precompact mines transcript directory when no MEMPAL_DIR."""
+ """Precompact ingests the active transcript via _ingest_transcript.
+
+ With no MEMPAL_DIR, _mine_sync is a no-op; the transcript ingest is
+ the only mining that should fire, and it goes through Popen
+ (background) inside _ingest_transcript. Pre-#1231-review this test
+ asserted against subprocess.run, which corresponded to the
+ duplicate-mine path that has now been removed.
+ """
transcript = tmp_path / "t.jsonl"
- transcript.write_text("")
+ # _ingest_transcript skips files smaller than 100 bytes, so pad it.
+ transcript.write_text("x" * 200)
monkeypatch.delenv("MEMPAL_DIR", raising=False)
- with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
- result = _capture_hook_output(
- hook_precompact,
- {"session_id": "test", "transcript_path": str(transcript)},
- state_dir=tmp_path,
- )
+ with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
+ with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
+ result = _capture_hook_output(
+ hook_precompact,
+ {"session_id": "test", "transcript_path": str(transcript)},
+ state_dir=tmp_path,
+ )
assert result == {}
- mock_run.assert_called_once()
- # Mine dir is the transcript's parent (resolved) and mode is convos for JSONL.
- call_args = mock_run.call_args[0][0]
- assert str(tmp_path.resolve()) in call_args
- assert call_args[call_args.index("--mode") + 1] == "convos"
+ mock_run.assert_not_called()
+ mock_popen.assert_called_once()
+ cmd = mock_popen.call_args[0][0]
+ # Mines the transcript's parent dir as convos, into wing "sessions".
+ assert str(tmp_path) in cmd
+ assert cmd[cmd.index("--mode") + 1] == "convos"
+ assert cmd[cmd.index("--wing") + 1] == "sessions"
# --- run_hook ---
diff --git a/tests/test_save_hook_mines.py b/tests/test_save_hook_mines.py
index 93a51f2..d9f40e8 100644
--- a/tests/test_save_hook_mines.py
+++ b/tests/test_save_hook_mines.py
@@ -61,3 +61,65 @@ class TestSaveHookAutoMines:
'MEMPAL_DIR defaults to "" which silently disables mining. '
"Either set a default path or add transcript-based mining."
)
+
+
+class TestShellHookTranscriptValidation:
+ """Both shell hooks must validate transcript paths before mining them.
+
+ Mirrors mempalace.hooks_cli._validate_transcript_path so unsafe paths
+ (no extension, traversal segments) are rejected at the shell layer
+ too — added in #1231 review (Copilot #7, #8).
+ """
+
+ @staticmethod
+ def _hook_src(name: str) -> str:
+ path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "hooks", name)
+ return open(path).read()
+
+ @staticmethod
+ def _strip_comments(src: str) -> str:
+ return "\n".join(line for line in src.splitlines() if not line.lstrip().startswith("#"))
+
+ def test_save_hook_defines_and_uses_validator(self):
+ src = self._strip_comments(self._hook_src("mempal_save_hook.sh"))
+ assert "is_valid_transcript_path() {" in src, "validator function must be defined"
+ assert (
+ 'is_valid_transcript_path "$TRANSCRIPT_PATH"' in src
+ ), "validator must be invoked against TRANSCRIPT_PATH before mining"
+
+ def test_precompact_hook_defines_and_uses_validator(self):
+ src = self._strip_comments(self._hook_src("mempal_precompact_hook.sh"))
+ assert "is_valid_transcript_path() {" in src, "validator function must be defined"
+ assert (
+ 'is_valid_transcript_path "$TRANSCRIPT_PATH"' in src
+ ), "validator must be invoked against TRANSCRIPT_PATH before mining"
+
+ def test_validators_run_via_bash(self, tmp_path):
+ """Source the validator out of each hook and exercise it directly."""
+ import subprocess
+
+ for name in ("mempal_save_hook.sh", "mempal_precompact_hook.sh"):
+ src = self._hook_src(name)
+ # Extract just the function definition (first occurrence).
+ start = src.index("is_valid_transcript_path() {")
+ end = src.index("\n}\n", start) + 2
+ func_src = src[start:end]
+ script = tmp_path / "v.sh"
+ script.write_text(
+ f"{func_src}\n" 'is_valid_transcript_path "$1" && echo OK || echo NO\n'
+ )
+
+ def run(arg: str) -> str:
+ return subprocess.run(
+ ["bash", str(script), arg],
+ capture_output=True,
+ text=True,
+ check=False,
+ ).stdout.strip()
+
+ assert run("/tmp/sessions/abc.jsonl") == "OK"
+ assert run("/tmp/sessions/abc.json") == "OK"
+ assert run("") == "NO"
+ assert run("/tmp/notes.txt") == "NO"
+ assert run("../etc/passwd.jsonl") == "NO"
+ assert run("/tmp/../etc/t.jsonl") == "NO"
From 3deebfed19f255348399ac7f985cc8fbe1555d43 Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Mon, 27 Apr 2026 02:45:04 -0300
Subject: [PATCH 3/3] test(hooks): skip bash subprocess validator test on
Windows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
`bash` on the Windows CI runner resolves to `wsl.exe` which fails with
"Windows Subsystem for Linux has no installed distributions." The shell
hooks themselves are POSIX-only — Windows users use the Python entry
point — so the bash-subprocess exercise is non-applicable on win32.
The static-grep validator tests still run on every platform, so the
shell-side validation is still asserted under Windows; only the live
bash invocation is skipped.
---
tests/test_save_hook_mines.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/tests/test_save_hook_mines.py b/tests/test_save_hook_mines.py
index d9f40e8..e11234a 100644
--- a/tests/test_save_hook_mines.py
+++ b/tests/test_save_hook_mines.py
@@ -9,6 +9,9 @@ Written BEFORE the fix.
"""
import os
+import sys
+
+import pytest
class TestSaveHookAutoMines:
@@ -94,6 +97,10 @@ class TestShellHookTranscriptValidation:
'is_valid_transcript_path "$TRANSCRIPT_PATH"' in src
), "validator must be invoked against TRANSCRIPT_PATH before mining"
+ @pytest.mark.skipif(
+ sys.platform == "win32",
+ reason="shell hooks are POSIX-only; Windows CI bash maps to wsl.exe with no distro",
+ )
def test_validators_run_via_bash(self, tmp_path):
"""Source the validator out of each hook and exercise it directly."""
import subprocess