From 1e3e89a78fe7e0ba15c3b8806c25dcbd19c8491f Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 26 Apr 2026 23:25:12 -0300 Subject: [PATCH] fix(hooks): pass --mode convos when mining a Claude Code transcript dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Stop and PreCompact hooks spawn `mempalace mine ` with no `--mode` flag, which defaults to `projects` in cli.py. When MEMPAL_DIR is unset, _get_mine_dir falls back to the parent of the transcript JSONL — and miner.py's READABLE_EXTENSIONS includes `.jsonl`, so the projects miner happily ingests Claude Code session JSONL as if it were source code instead of conversation. Make _get_mine_dir return (dir, mode): MEMPAL_DIR keeps `projects`, the JSONL fallback yields `convos`. Both _maybe_auto_ingest and _mine_sync now thread the mode into the spawned command. --- mempalace/hooks_cli.py | 28 +++++++++++++------- tests/test_hooks_cli.py | 58 ++++++++++++++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py index 01eca3f..e7b3981 100644 --- a/mempalace/hooks_cli.py +++ b/mempalace/hooks_cli.py @@ -197,16 +197,26 @@ def _output(data: dict): sys.stdout.buffer.flush() -def _get_mine_dir(transcript_path: str = "") -> str: - """Determine directory to mine from MEMPAL_DIR or transcript path.""" +def _get_mine_dir(transcript_path: str = "") -> tuple[str, str]: + """Determine directory to mine and the miner mode to use. + + Returns ``(dir, mode)`` where ``mode`` is ``"projects"`` or ``"convos"``. + Empty ``dir`` means no ingest should run. + + MEMPAL_DIR is treated as a project directory ("projects" mode). The + transcript-path fallback resolves to the parent of a Claude Code + session JSONL, which must be mined with the conversation miner — + running the projects miner there ingests JSONL as if it were source + code. + """ mempal_dir = os.environ.get("MEMPAL_DIR", "") if mempal_dir and os.path.isdir(mempal_dir): - return mempal_dir + return mempal_dir, "projects" if transcript_path: path = Path(transcript_path).expanduser() if path.is_file(): - return str(path.parent) - return "" + return str(path.parent), "convos" + return "", "projects" _MINE_PID_FILE = STATE_DIR / "mine.pid" @@ -265,21 +275,21 @@ def _spawn_mine(cmd: list) -> None: def _maybe_auto_ingest(transcript_path: str = ""): """Run mempalace mine in background if a mine directory is available.""" - mine_dir = _get_mine_dir(transcript_path) + mine_dir, mode = _get_mine_dir(transcript_path) if not mine_dir: return if _mine_already_running(): _log("Skipping auto-ingest: mine already running") return try: - _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir]) + _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode]) except OSError: pass def _mine_sync(transcript_path: str = ""): """Run mempalace mine synchronously (for precompact -- data must land first).""" - mine_dir = _get_mine_dir(transcript_path) + mine_dir, mode = _get_mine_dir(transcript_path) if not mine_dir: return try: @@ -287,7 +297,7 @@ def _mine_sync(transcript_path: str = ""): log_path = STATE_DIR / "hook.log" with open(log_path, "a") as log_f: subprocess.run( - [sys.executable, "-m", "mempalace", "mine", mine_dir], + [sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode], stdout=log_f, stderr=log_f, timeout=60, diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py index c9a0022..7a19dda 100644 --- a/tests/test_hooks_cli.py +++ b/tests/test_hooks_cli.py @@ -17,6 +17,7 @@ from mempalace.hooks_cli import ( _maybe_auto_ingest, _mempalace_python, _mine_already_running, + _mine_sync, _parse_harness_input, _sanitize_session_id, _validate_transcript_path, @@ -434,7 +435,7 @@ def test_maybe_auto_ingest_no_env(tmp_path): def test_maybe_auto_ingest_with_env(tmp_path): - """With MEMPAL_DIR set to a valid directory, spawns subprocess.""" + """With MEMPAL_DIR set, spawns mine in projects mode against that dir.""" mempal_dir = tmp_path / "project" mempal_dir.mkdir() with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): @@ -443,10 +444,14 @@ def test_maybe_auto_ingest_with_env(tmp_path): with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: _maybe_auto_ingest() mock_popen.assert_called_once() + cmd = mock_popen.call_args[0][0] + assert "mine" in cmd + assert str(mempal_dir) in cmd + assert cmd[cmd.index("--mode") + 1] == "projects" def test_maybe_auto_ingest_with_transcript(tmp_path): - """Falls back to transcript directory when MEMPAL_DIR is not set.""" + """Transcript fallback spawns mine in convos mode against the JSONL parent.""" transcript = tmp_path / "t.jsonl" transcript.write_text("") with patch.dict("os.environ", {}, clear=True): @@ -455,6 +460,38 @@ def test_maybe_auto_ingest_with_transcript(tmp_path): with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: _maybe_auto_ingest(str(transcript)) mock_popen.assert_called_once() + cmd = mock_popen.call_args[0][0] + assert "mine" in cmd + assert str(tmp_path) in cmd + assert cmd[cmd.index("--mode") + 1] == "convos" + + +def test_mine_sync_with_transcript_uses_convos_mode(tmp_path): + """Precompact sync path also picks convos mode for JSONL transcripts.""" + transcript = tmp_path / "t.jsonl" + transcript.write_text("") + with patch.dict("os.environ", {}, clear=True): + with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): + with patch("mempalace.hooks_cli.subprocess.run") as mock_run: + _mine_sync(str(transcript)) + mock_run.assert_called_once() + cmd = mock_run.call_args[0][0] + assert "mine" in cmd + assert str(tmp_path) in cmd + assert cmd[cmd.index("--mode") + 1] == "convos" + + +def test_mine_sync_with_env_uses_projects_mode(tmp_path): + """Precompact sync path uses projects mode when MEMPAL_DIR is set.""" + mempal_dir = tmp_path / "project" + mempal_dir.mkdir() + with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): + with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): + with patch("mempalace.hooks_cli.subprocess.run") as mock_run: + _mine_sync() + mock_run.assert_called_once() + cmd = mock_run.call_args[0][0] + assert cmd[cmd.index("--mode") + 1] == "projects" def test_maybe_auto_ingest_oserror(tmp_path): @@ -517,27 +554,27 @@ def test_mine_already_running_corrupt_file(tmp_path): def test_get_mine_dir_mempal_dir(tmp_path): - """MEMPAL_DIR takes priority over transcript_path.""" + """MEMPAL_DIR takes priority and is treated as projects mode.""" mempal_dir = tmp_path / "project" mempal_dir.mkdir() transcript = tmp_path / "t.jsonl" transcript.write_text("") with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): - assert _get_mine_dir(str(transcript)) == str(mempal_dir) + assert _get_mine_dir(str(transcript)) == (str(mempal_dir), "projects") def test_get_mine_dir_transcript_fallback(tmp_path): - """Falls back to transcript parent dir when MEMPAL_DIR is not set.""" + """Transcript fallback resolves to its parent dir in convos mode.""" transcript = tmp_path / "t.jsonl" transcript.write_text("") with patch.dict("os.environ", {}, clear=True): - assert _get_mine_dir(str(transcript)) == str(tmp_path) + assert _get_mine_dir(str(transcript)) == (str(tmp_path), "convos") def test_get_mine_dir_empty(): - """Returns empty string when nothing is available.""" + """Returns empty dir when nothing is available.""" with patch.dict("os.environ", {}, clear=True): - assert _get_mine_dir("") == "" + assert _get_mine_dir("") == ("", "projects") # --- _parse_harness_input --- @@ -669,9 +706,10 @@ def test_precompact_mines_transcript_dir(tmp_path, monkeypatch): ) assert result == {} mock_run.assert_called_once() - # Verify mine dir is the transcript's parent + # Mine dir is the transcript's parent and mode is convos for JSONL. call_args = mock_run.call_args[0][0] - assert str(tmp_path) in call_args[-1] + assert str(tmp_path) in call_args + assert call_args[call_args.index("--mode") + 1] == "convos" # --- run_hook ---