diff --git a/CHANGELOG.md b/CHANGELOG.md index c17442b..ad2ae98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,14 +8,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [3.3.4] — unreleased +### Added + +- **`mempalace init` now prompts to mine the same directory.** After entity confirmation, room detection, and gitignore guard, `init` asks `Mine this directory now? [Y/n]` (default yes) and runs `mine()` in-process if accepted. `--yes` skips the prompt and auto-mines, so non-interactive invocations are still hands-off. Declining prints the exact `mempalace mine ` command for later. Removes the "remember to type the next command" friction that existed because the rooms + entities just set up are almost always mined immediately. (#1181) +- **Cross-wing topic tunnels.** When two wings have confirmed `TOPIC` labels in common (the LLM-refine bucket from `mempalace init --llm`), the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). Tunnels are routed through the existing `create_tunnel` storage so they share dedup and persistence with explicit tunnels. Topic tunnels are stored under a synthetic `topic:` room and tagged with `kind: "topic"` on the stored dict — this keeps them distinct from literal folder-derived rooms of the same name (a wing with both an `Angular` folder room and an `Angular` topic tunnel no longer collides at `follow_tunnels` read time) and gives LLMs scanning `list_tunnels` a visible discriminator. Threshold is configurable via `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` env var or `topic_tunnel_min_count` in `~/.mempalace/config.json` (default `1`). Manifest-dependency overlap and per-topic allow/deny lists remain out of scope. (#1180) + ### Bug Fixes - **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap. - **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179) - -### Added - -- **Cross-wing topic tunnels.** When two wings have confirmed `TOPIC` labels in common (the LLM-refine bucket from `mempalace init --llm`), the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). Tunnels are routed through the existing `create_tunnel` storage so they share dedup and persistence with explicit tunnels. Topic tunnels are stored under a synthetic `topic:` room and tagged with `kind: "topic"` on the stored dict — this keeps them distinct from literal folder-derived rooms of the same name (a wing with both an `Angular` folder room and an `Angular` topic tunnel no longer collides at `follow_tunnels` read time) and gives LLMs scanning `list_tunnels` a visible discriminator. Threshold is configurable via `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` env var or `topic_tunnel_min_count` in `~/.mempalace/config.json` (default `1`). Manifest-dependency overlap and per-topic allow/deny lists remain out of scope. (#1180) +- **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182) --- diff --git a/mempalace/cli.py b/mempalace/cli.py index 3dff964..39bf9e5 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -156,6 +156,64 @@ def cmd_init(args): # Pass 3: protect git repos from accidentally committing per-project files _ensure_mempalace_files_gitignored(args.dir) + # Pass 4: offer to run mine immediately. The directory just had its + # rooms + entities set up, so 99% of users will mine next anyway — + # asking here removes the "remember to type the next command" friction. + # `--yes` skips the prompt and auto-mines (non-interactive path). + _maybe_run_mine_after_init(args, cfg) + + +def _maybe_run_mine_after_init(args, cfg) -> None: + """Prompt the user to mine the directory just initialised, or auto-mine + when ``--yes`` was passed. Extracted so the prompt path is unit-testable. + + Mine errors are surfaced (not swallowed): a failing mine exits with a + non-zero status via :func:`sys.exit` so downstream scripts can see it. + """ + from .miner import mine, scan_project + + project_dir = args.dir + auto = bool(getattr(args, "yes", False)) + + # Pre-scan so the user knows roughly what they're agreeing to before + # the prompt. The scan is fast and we'd run it inside mine() anyway. + try: + files = scan_project(project_dir) + file_count = len(files) + except Exception: + file_count = None + + if auto: + print("\n Auto-mining (--yes).") + else: + scope = ( + f" (~{file_count} files in scope)" + if isinstance(file_count, int) and file_count > 0 + else "" + ) + print(f"\n Ready to mine{scope}.") + try: + answer = input(" Mine this directory now? [Y/n] ").strip().lower() + except EOFError: + # Non-interactive stdin (e.g. piped) — treat like decline so + # we don't block. User can re-run with --yes to opt in. + answer = "n" + if answer not in ("", "y", "yes"): + print(f"\n Skipped. Run `mempalace mine {project_dir}` when ready.") + return + + palace_path = cfg.palace_path + try: + mine(project_dir=project_dir, palace_path=palace_path) + except KeyboardInterrupt: + # mine() handles its own SIGINT summary + sys.exit(130); re-raise + # any KeyboardInterrupt that escapes (shouldn't happen) so the + # shell still sees a clean interrupt rather than a swallowed one. + raise + except Exception as e: + print(f"\n ERROR: mine failed: {e}", file=sys.stderr) + sys.exit(1) + def cmd_mine(args): palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path diff --git a/mempalace/miner.py b/mempalace/miner.py index 6dea4a1..9a2283b 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -1025,50 +1025,117 @@ def mine( total_drawers = 0 files_skipped = 0 + files_processed = 0 + last_file = None room_counts = defaultdict(int) - for i, filepath in enumerate(files, 1): - drawers, room = process_file( - filepath=filepath, - project_path=project_path, - collection=collection, - wing=wing, - rooms=rooms, - agent=agent, - dry_run=dry_run, - closets_col=closets_col, + try: + for i, filepath in enumerate(files, 1): + try: + drawers, room = process_file( + filepath=filepath, + project_path=project_path, + collection=collection, + wing=wing, + rooms=rooms, + agent=agent, + dry_run=dry_run, + closets_col=closets_col, + ) + except KeyboardInterrupt: + # Re-raise so the outer handler prints the summary; we + # capture the last-attempted file via last_file below. + last_file = filepath.name + raise + files_processed = i + last_file = filepath.name + if drawers == 0 and not dry_run: + files_skipped += 1 + else: + total_drawers += drawers + room_counts[room] += 1 + if not dry_run: + print(f" + [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}") + + if not dry_run: + # Cross-wing topic tunnels: after every file in this wing has been + # processed, link this wing to any other wing that shares a + # confirmed TOPIC label. Out of scope for v1: manifest-dependency + # overlap, per-topic allow/deny lists, search-result surfacing. + try: + tunnels_added = _compute_topic_tunnels_for_wing(wing) + if tunnels_added: + print(f"\n Topic tunnels: +{tunnels_added} cross-wing link(s)") + except Exception as e: + # Tunnel computation must never fail a mine — degrade quietly. + print( + f"\n WARNING: topic tunnel computation skipped — {e}", + file=sys.stderr, + ) + + print(f"\n{'=' * 55}") + print(" Done.") + print(f" Files processed: {len(files) - files_skipped}") + print(f" Files skipped (already filed): {files_skipped}") + print(f" Drawers filed: {total_drawers}") + print("\n By room:") + for room, count in sorted(room_counts.items(), key=lambda x: x[1], reverse=True): + print(f" {room:20} {count} files") + print('\n Next: mempalace search "what you\'re looking for"') + print(f"{'=' * 55}\n") + except KeyboardInterrupt: + # Idempotent re-mine: deterministic drawer IDs mean already-filed + # drawers upsert to the same row on next run, so partial progress + # is safe to leave in place. A second Ctrl-C during this print + # propagates to the default handler — we don't try to catch + # everything. + print("\n\n Mine interrupted.") + print(f" files_processed: {files_processed}/{len(files)}") + print(f" drawers_filed: {total_drawers}") + print(f" last_file: {last_file or ''}") + print( + f"\n Re-run `mempalace mine {project_dir}` to resume — " + "already-filed drawers are\n upserted idempotently and will not duplicate.\n" ) - if drawers == 0 and not dry_run: - files_skipped += 1 - else: - total_drawers += drawers - room_counts[room] += 1 - if not dry_run: - print(f" + [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}") + sys.exit(130) + finally: + # Clean up the hooks-side PID lock if it points at us. Stale + # entries already pass _pid_alive() == False on POSIX, but + # actively removing the file makes the state observable + # (callers can stat it) and avoids accidental PID reuse on + # short-lived test runs. Only remove if the file claims our + # own PID — never another process's. + _cleanup_mine_pid_file() - if not dry_run: - # Cross-wing topic tunnels: after every file in this wing has been - # processed, link this wing to any other wing that shares a - # confirmed TOPIC label. Out of scope for v1: manifest-dependency - # overlap, per-topic allow/deny lists, search-result surfacing. - try: - tunnels_added = _compute_topic_tunnels_for_wing(wing) - if tunnels_added: - print(f"\n Topic tunnels: +{tunnels_added} cross-wing link(s)") - except Exception as e: - # Tunnel computation must never fail a mine — degrade quietly. - print(f"\n WARNING: topic tunnel computation skipped — {e}", file=sys.stderr) - print(f"\n{'=' * 55}") - print(" Done.") - print(f" Files processed: {len(files) - files_skipped}") - print(f" Files skipped (already filed): {files_skipped}") - print(f" Drawers filed: {total_drawers}") - print("\n By room:") - for room, count in sorted(room_counts.items(), key=lambda x: x[1], reverse=True): - print(f" {room:20} {count} files") - print('\n Next: mempalace search "what you\'re looking for"') - print(f"{'=' * 55}\n") +def _cleanup_mine_pid_file() -> None: + """Remove the global mine PID file if it currently points at us. + + The PID file (``~/.mempalace/hook_state/mine.pid``, written by the + hook in :func:`mempalace.hooks_cli._spawn_mine`) tracks the PID of + the most recently spawned mine subprocess so the hook can dedup + concurrent auto-ingest fires. When that subprocess exits — cleanly, + on error, or via Ctrl-C — it should remove its own entry so the + next hook fire isn't briefly fooled by a stale PID before + ``_pid_alive`` returns False. + + We only delete the file if it claims our own PID; any other PID is + left alone (could be an unrelated mine running concurrently from + a different worktree / session). + """ + try: + from .hooks_cli import _MINE_PID_FILE + except Exception: + return + try: + if not _MINE_PID_FILE.exists(): + return + recorded = _MINE_PID_FILE.read_text().strip() + if recorded and recorded.isdigit() and int(recorded) == os.getpid(): + _MINE_PID_FILE.unlink() + except OSError: + # Best-effort cleanup; never fail the mine over PID bookkeeping. + pass def _compute_topic_tunnels_for_wing(wing: str) -> int: diff --git a/tests/test_cli.py b/tests/test_cli.py index 1c4dfbd..a2a5cbf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -108,6 +108,7 @@ def test_cmd_init_no_entities(mock_config_cls, tmp_path): with ( patch("mempalace.entity_detector.scan_for_detection", return_value=[]), patch("mempalace.room_detector_local.detect_rooms_local") as mock_rooms, + patch("mempalace.cli._maybe_run_mine_after_init"), ): cmd_init(args) mock_rooms.assert_called_once_with(project_dir=str(tmp_path), yes=True) @@ -126,6 +127,7 @@ def test_cmd_init_with_entities(mock_config_cls, tmp_path): patch("mempalace.entity_detector.confirm_entities", return_value=confirmed), patch("mempalace.room_detector_local.detect_rooms_local"), patch("builtins.open", MagicMock()), + patch("mempalace.cli._maybe_run_mine_after_init"), ): cmd_init(args) @@ -140,12 +142,122 @@ def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys): patch("mempalace.entity_detector.scan_for_detection", return_value=fake_files), patch("mempalace.entity_detector.detect_entities", return_value=detected), patch("mempalace.room_detector_local.detect_rooms_local"), + patch("mempalace.cli._maybe_run_mine_after_init"), ): cmd_init(args) out = capsys.readouterr().out assert "No entities detected" in out +# ── _maybe_run_mine_after_init (init → mine prompt, #1181) ───────────── + + +def _init_args(tmp_path, *, yes=False): + return argparse.Namespace(dir=str(tmp_path), yes=yes) + + +def _fake_cfg(tmp_path): + cfg = MagicMock() + cfg.palace_path = str(tmp_path / "palace") + return cfg + + +def test_maybe_run_mine_prompt_accepted_runs_mine(tmp_path): + """Empty / 'y' / 'yes' on the prompt triggers mine() in-process.""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=False) + cfg = _fake_cfg(tmp_path) + with ( + patch("mempalace.miner.mine") as mock_mine, + patch("mempalace.miner.scan_project", return_value=["a", "b", "c"]), + patch("builtins.input", return_value=""), + ): + _maybe_run_mine_after_init(args, cfg) + mock_mine.assert_called_once_with(project_dir=str(tmp_path), palace_path=cfg.palace_path) + + +def test_maybe_run_mine_prompt_yes_accepted_runs_mine(tmp_path): + """Explicit 'y' answer also runs mine().""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=False) + cfg = _fake_cfg(tmp_path) + with ( + patch("mempalace.miner.mine") as mock_mine, + patch("mempalace.miner.scan_project", return_value=[]), + patch("builtins.input", return_value="Y"), + ): + _maybe_run_mine_after_init(args, cfg) + mock_mine.assert_called_once() + + +def test_maybe_run_mine_prompt_declined_prints_hint(tmp_path, capsys): + """'n' answer skips mine() and prints the resume hint.""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=False) + cfg = _fake_cfg(tmp_path) + with ( + patch("mempalace.miner.mine") as mock_mine, + patch("mempalace.miner.scan_project", return_value=[]), + patch("builtins.input", return_value="n"), + ): + _maybe_run_mine_after_init(args, cfg) + mock_mine.assert_not_called() + out = capsys.readouterr().out + assert f"mempalace mine {tmp_path}" in out + assert "Skipped" in out + + +def test_maybe_run_mine_yes_flag_skips_prompt_and_mines(tmp_path): + """`--yes` runs mine() automatically without calling input().""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=True) + cfg = _fake_cfg(tmp_path) + with ( + patch("mempalace.miner.mine") as mock_mine, + patch("mempalace.miner.scan_project", return_value=[]), + patch("builtins.input", side_effect=AssertionError("input() must not be called")), + ): + _maybe_run_mine_after_init(args, cfg) + mock_mine.assert_called_once_with(project_dir=str(tmp_path), palace_path=cfg.palace_path) + + +def test_maybe_run_mine_eof_on_stdin_treated_as_decline(tmp_path, capsys): + """Piped / non-interactive stdin (EOFError) declines without crashing.""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=False) + cfg = _fake_cfg(tmp_path) + with ( + patch("mempalace.miner.mine") as mock_mine, + patch("mempalace.miner.scan_project", return_value=[]), + patch("builtins.input", side_effect=EOFError), + ): + _maybe_run_mine_after_init(args, cfg) + mock_mine.assert_not_called() + assert "Skipped" in capsys.readouterr().out + + +def test_maybe_run_mine_failure_surfaces_via_exit(tmp_path, capsys): + """Mine errors are not swallowed — they exit non-zero with an error line.""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=True) + cfg = _fake_cfg(tmp_path) + with ( + patch("mempalace.miner.mine", side_effect=RuntimeError("boom")), + patch("mempalace.miner.scan_project", return_value=[]), + ): + with pytest.raises(SystemExit) as exc_info: + _maybe_run_mine_after_init(args, cfg) + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "boom" in err + + # ── cmd_mine ─────────────────────────────────────────────────────────── diff --git a/tests/test_miner.py b/tests/test_miner.py index 7f142bb..bb4f437 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -600,3 +600,118 @@ def test_mine_no_tunnel_when_only_one_wing_has_topics(tmp_path, monkeypatch): mine(str(project_root), str(palace_path)) assert palace_graph.list_tunnels() == [] + + +# ── graceful Ctrl-C handling (#1182) ──────────────────────────────────── + + +def _make_minable_project(project_root: Path, n_files: int = 3) -> None: + """Create a tiny project with N readable files + a config so mine() runs.""" + for idx in range(n_files): + write_file( + project_root / f"f{idx}.py", + f"def fn_{idx}():\n print('hi {idx}')\n" * 20, + ) + with open(project_root / "mempalace.yaml", "w") as f: + yaml.dump( + { + "wing": "interrupt_test", + "rooms": [{"name": "general", "description": "General"}], + }, + f, + ) + + +def test_mine_keyboard_interrupt_prints_summary_and_exits_130(tmp_path, capsys): + """A KeyboardInterrupt mid-loop produces the clean summary + exit 130.""" + import pytest + from unittest.mock import patch + + project_root = tmp_path / "proj" + project_root.mkdir() + _make_minable_project(project_root, n_files=4) + palace_path = project_root / "palace" + + call_count = {"n": 0} + + def fake_process_file(*args, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 2: + raise KeyboardInterrupt + return (1, "general") + + with patch("mempalace.miner.process_file", side_effect=fake_process_file): + with pytest.raises(SystemExit) as exc_info: + mine(str(project_root), str(palace_path)) + + assert exc_info.value.code == 130 + out = capsys.readouterr().out + assert "Mine interrupted." in out + assert "files_processed: 1/" in out + assert "drawers_filed:" in out + assert "last_file:" in out + assert "upserted idempotently" in out + + +def test_mine_cleans_up_pid_file_on_interrupt(tmp_path): + """Our own PID entry in mine.pid is removed in the finally clause.""" + import pytest + from unittest.mock import patch + + project_root = tmp_path / "proj" + project_root.mkdir() + _make_minable_project(project_root, n_files=2) + palace_path = project_root / "palace" + + pid_file = tmp_path / "mine.pid" + pid_file.write_text(str(os.getpid())) + + def fake_process_file(*args, **kwargs): + raise KeyboardInterrupt + + with ( + patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file), + patch("mempalace.miner.process_file", side_effect=fake_process_file), + ): + with pytest.raises(SystemExit): + mine(str(project_root), str(palace_path)) + + assert not pid_file.exists(), "Our PID entry should be cleaned up on interrupt" + + +def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path): + """Successful mine also removes its own PID entry in the finally clause.""" + from unittest.mock import patch + + project_root = tmp_path / "proj" + project_root.mkdir() + _make_minable_project(project_root, n_files=1) + palace_path = project_root / "palace" + + pid_file = tmp_path / "mine.pid" + pid_file.write_text(str(os.getpid())) + + with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file): + mine(str(project_root), str(palace_path)) + + assert not pid_file.exists() + + +def test_mine_does_not_remove_other_processes_pid_file(tmp_path): + """A PID file pointing at someone else's PID is left untouched.""" + from unittest.mock import patch + + project_root = tmp_path / "proj" + project_root.mkdir() + _make_minable_project(project_root, n_files=1) + palace_path = project_root / "palace" + + other_pid = os.getpid() + 999_999 # a PID that isn't us + pid_file = tmp_path / "mine.pid" + pid_file.write_text(str(other_pid)) + + with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file): + mine(str(project_root), str(palace_path)) + + assert pid_file.exists(), "Foreign PID entries must not be removed" + assert pid_file.read_text().strip() == str(other_pid)