diff --git a/CHANGELOG.md b/CHANGELOG.md
index c17442b..b5433ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,14 +8,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
## [3.3.4] — unreleased
+### Added
+
+- **`mempalace init` now prompts to mine the same directory.** After entity confirmation, room detection, and gitignore guard, `init` shows a one-line scope estimate (e.g. `~423 files (~12 MB) would be mined into this palace.`) computed from its existing corpus walk, then asks `Mine this directory now? [Y/n]` (default yes) and runs `mine()` in-process if accepted. The estimate fires before the prompt so users on a real corpus aren't surprised by a minutes-long ChromaDB write. Declining prints the exact `mempalace mine
` command for later. (#1181)
+- **New `--auto-mine` flag on `mempalace init`** for the non-interactive path (`mempalace init --auto-mine ` skips the mine prompt and runs mine directly). `--yes` retains its existing scope of entity auto-accept only and still prompts for the mine step, so existing scripted callers see no behaviour change; combining `--yes --auto-mine` gives a fully non-interactive setup. (#1181)
+- **Cross-wing topic tunnels.** When two wings have confirmed `TOPIC` labels in common (the LLM-refine bucket from `mempalace init --llm`), the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). Tunnels are routed through the existing `create_tunnel` storage so they share dedup and persistence with explicit tunnels. Topic tunnels are stored under a synthetic `topic:` room and tagged with `kind: "topic"` on the stored dict — this keeps them distinct from literal folder-derived rooms of the same name (a wing with both an `Angular` folder room and an `Angular` topic tunnel no longer collides at `follow_tunnels` read time) and gives LLMs scanning `list_tunnels` a visible discriminator. Threshold is configurable via `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` env var or `topic_tunnel_min_count` in `~/.mempalace/config.json` (default `1`). Manifest-dependency overlap and per-topic allow/deny lists remain out of scope. (#1180)
+
### Bug Fixes
- **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap.
- **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179)
-
-### Added
-
-- **Cross-wing topic tunnels.** When two wings have confirmed `TOPIC` labels in common (the LLM-refine bucket from `mempalace init --llm`), the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). Tunnels are routed through the existing `create_tunnel` storage so they share dedup and persistence with explicit tunnels. Topic tunnels are stored under a synthetic `topic:` room and tagged with `kind: "topic"` on the stored dict — this keeps them distinct from literal folder-derived rooms of the same name (a wing with both an `Angular` folder room and an `Angular` topic tunnel no longer collides at `follow_tunnels` read time) and gives LLMs scanning `list_tunnels` a visible discriminator. Threshold is configurable via `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` env var or `topic_tunnel_min_count` in `~/.mempalace/config.json` (default `1`). Manifest-dependency overlap and per-topic allow/deny lists remain out of scope. (#1180)
+- **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182)
---
diff --git a/mempalace/cli.py b/mempalace/cli.py
index 3dff964..7382cf3 100644
--- a/mempalace/cli.py
+++ b/mempalace/cli.py
@@ -156,6 +156,107 @@ def cmd_init(args):
# Pass 3: protect git repos from accidentally committing per-project files
_ensure_mempalace_files_gitignored(args.dir)
+ # Pass 4: offer to run mine immediately. The directory just had its
+ # rooms + entities set up, so 99% of users will mine next anyway —
+ # asking here removes the "remember to type the next command" friction.
+ # `--auto-mine` skips the prompt and mines automatically; `--yes` is
+ # SCOPED to entity auto-accept and does NOT imply mining.
+ _maybe_run_mine_after_init(args, cfg)
+
+
+def _format_size_mb(num_bytes: int) -> str:
+ """Render a byte count as a human-readable size for the mine estimate.
+
+ < 1 MB rounds up to ``<1 MB`` so users never see a misleading ``0 MB``
+ on small projects. Otherwise reports an integer megabyte count.
+ """
+ if num_bytes <= 0:
+ return "<1 MB"
+ mb = num_bytes / (1024 * 1024)
+ if mb < 1:
+ return "<1 MB"
+ return f"{mb:.0f} MB"
+
+
+def _maybe_run_mine_after_init(args, cfg) -> None:
+ """Prompt the user to mine the directory just initialised, or auto-mine
+ when ``--auto-mine`` was passed. Extracted so the prompt path is
+ unit-testable.
+
+ Behaviour matrix:
+
+ - default (no flags) — prompt, default Yes, mine in-process if accepted
+ - ``--yes`` — entity auto-accept only; STILL prompts for the mine step
+ - ``--auto-mine`` — skip the mine prompt and mine directly
+ - ``--yes --auto-mine`` — fully non-interactive
+
+ Mine errors are surfaced (not swallowed): a failing mine exits with a
+ non-zero status via :func:`sys.exit` so downstream scripts can see it.
+ The pre-scan that produces the file-count estimate is reused as the
+ mine input so we never walk the corpus twice.
+ """
+ from .miner import mine, scan_project
+
+ project_dir = args.dir
+ auto_mine = bool(getattr(args, "auto_mine", False))
+
+ # Single corpus walk: this scan feeds BOTH the "what would be mined"
+ # estimate the user sees in the prompt AND the file list mine() will
+ # process. We pass the result into mine() via the `files` kwarg so it
+ # doesn't re-walk the tree.
+ try:
+ scanned_files = scan_project(project_dir)
+ file_count = len(scanned_files)
+ total_bytes = 0
+ for fp in scanned_files:
+ try:
+ total_bytes += fp.stat().st_size
+ except OSError:
+ # Skip files that vanished between scan and stat — mine()
+ # will skip them too.
+ continue
+ size_str = _format_size_mb(total_bytes)
+ except Exception:
+ scanned_files = None
+ file_count = None
+ size_str = None
+
+ # Show the scope estimate BEFORE the prompt so the user knows what
+ # they are agreeing to. On a real corpus mine takes minutes; hitting
+ # Enter on a default-Y prompt with no size cue is a footgun.
+ if isinstance(file_count, int):
+ if size_str:
+ print(f" ~{file_count} files (~{size_str}) would be mined into this palace.\n")
+ else:
+ print(f" ~{file_count} files would be mined into this palace.\n")
+
+ if not auto_mine:
+ try:
+ answer = input(" Mine this directory now? [Y/n] ").strip().lower()
+ except EOFError:
+ # Non-interactive stdin (e.g. piped) — treat like decline so
+ # we don't block. User can re-run with --auto-mine to opt in.
+ answer = "n"
+ if answer not in ("", "y", "yes"):
+ print(f"\n Skipped. Run `mempalace mine {shlex.quote(project_dir)}` when ready.")
+ return
+
+ palace_path = cfg.palace_path
+ try:
+ mine(
+ project_dir=project_dir,
+ palace_path=palace_path,
+ files=scanned_files,
+ )
+ except KeyboardInterrupt:
+ # mine() handles its own SIGINT summary + sys.exit(130); re-raise
+ # any KeyboardInterrupt that escapes (shouldn't happen) so the
+ # shell still sees a clean interrupt rather than a swallowed one.
+ raise
+ except Exception as e:
+ print(f"\n ERROR: mine failed: {e}", file=sys.stderr)
+ sys.exit(1)
+
def cmd_mine(args):
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
@@ -585,6 +686,14 @@ def main():
action="store_true",
help="Auto-accept all detected entities (non-interactive)",
)
+ p_init.add_argument(
+ "--auto-mine",
+ action="store_true",
+ help=(
+ "Skip the post-init mine prompt and run mine automatically. "
+ "Combine with --yes for a fully non-interactive setup."
+ ),
+ )
p_init.add_argument(
"--lang",
default=None,
diff --git a/mempalace/miner.py b/mempalace/miner.py
index 6dea4a1..b593797 100644
--- a/mempalace/miner.py
+++ b/mempalace/miner.py
@@ -9,6 +9,7 @@ Stores verbatim chunks as drawers. No summaries. Ever.
import os
import sys
+import shlex
import hashlib
import fnmatch
from pathlib import Path
@@ -981,8 +982,16 @@ def mine(
dry_run: bool = False,
respect_gitignore: bool = True,
include_ignored: list = None,
+ files: list = None,
):
- """Mine a project directory into the palace."""
+ """Mine a project directory into the palace.
+
+ ``files`` may optionally be a pre-scanned list of file paths from
+ :func:`scan_project`. When provided, the corpus walk is skipped — the
+ caller (e.g. ``init`` showing a file-count estimate before the mine
+ prompt) avoids walking the tree twice. When ``None`` (the default),
+ ``mine`` walks the tree itself just like before.
+ """
project_path = Path(project_dir).expanduser().resolve()
config = load_config(project_dir)
@@ -990,11 +999,12 @@ def mine(
wing = wing_override or config["wing"]
rooms = config.get("rooms", [{"name": "general", "description": "All project files"}])
- files = scan_project(
- project_dir,
- respect_gitignore=respect_gitignore,
- include_ignored=include_ignored,
- )
+ if files is None:
+ files = scan_project(
+ project_dir,
+ respect_gitignore=respect_gitignore,
+ include_ignored=include_ignored,
+ )
if limit > 0:
files = files[:limit]
@@ -1025,50 +1035,117 @@ def mine(
total_drawers = 0
files_skipped = 0
+ files_processed = 0
+ last_file = None
room_counts = defaultdict(int)
- for i, filepath in enumerate(files, 1):
- drawers, room = process_file(
- filepath=filepath,
- project_path=project_path,
- collection=collection,
- wing=wing,
- rooms=rooms,
- agent=agent,
- dry_run=dry_run,
- closets_col=closets_col,
+ try:
+ for i, filepath in enumerate(files, 1):
+ try:
+ drawers, room = process_file(
+ filepath=filepath,
+ project_path=project_path,
+ collection=collection,
+ wing=wing,
+ rooms=rooms,
+ agent=agent,
+ dry_run=dry_run,
+ closets_col=closets_col,
+ )
+ except KeyboardInterrupt:
+ # Re-raise so the outer handler prints the summary; we
+ # capture the last-attempted file via last_file below.
+ last_file = filepath.name
+ raise
+ files_processed = i
+ last_file = filepath.name
+ if drawers == 0 and not dry_run:
+ files_skipped += 1
+ else:
+ total_drawers += drawers
+ room_counts[room] += 1
+ if not dry_run:
+ print(f" + [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}")
+
+ if not dry_run:
+ # Cross-wing topic tunnels: after every file in this wing has been
+ # processed, link this wing to any other wing that shares a
+ # confirmed TOPIC label. Out of scope for v1: manifest-dependency
+ # overlap, per-topic allow/deny lists, search-result surfacing.
+ try:
+ tunnels_added = _compute_topic_tunnels_for_wing(wing)
+ if tunnels_added:
+ print(f"\n Topic tunnels: +{tunnels_added} cross-wing link(s)")
+ except Exception as e:
+ # Tunnel computation must never fail a mine — degrade quietly.
+ print(
+ f"\n WARNING: topic tunnel computation skipped — {e}",
+ file=sys.stderr,
+ )
+
+ print(f"\n{'=' * 55}")
+ print(" Done.")
+ print(f" Files processed: {len(files) - files_skipped}")
+ print(f" Files skipped (already filed): {files_skipped}")
+ print(f" Drawers filed: {total_drawers}")
+ print("\n By room:")
+ for room, count in sorted(room_counts.items(), key=lambda x: x[1], reverse=True):
+ print(f" {room:20} {count} files")
+ print('\n Next: mempalace search "what you\'re looking for"')
+ print(f"{'=' * 55}\n")
+ except KeyboardInterrupt:
+ # Idempotent re-mine: deterministic drawer IDs mean already-filed
+ # drawers upsert to the same row on next run, so partial progress
+ # is safe to leave in place. A second Ctrl-C during this print
+ # propagates to the default handler — we don't try to catch
+ # everything.
+ print("\n\n Mine interrupted.")
+ print(f" files_processed: {files_processed}/{len(files)}")
+ print(f" drawers_filed: {total_drawers}")
+ print(f" last_file: {last_file or ''}")
+ print(
+ f"\n Re-run `mempalace mine {shlex.quote(project_dir)}` to resume — "
+ "already-filed drawers are\n upserted idempotently and will not duplicate.\n"
)
- if drawers == 0 and not dry_run:
- files_skipped += 1
- else:
- total_drawers += drawers
- room_counts[room] += 1
- if not dry_run:
- print(f" + [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}")
+ sys.exit(130)
+ finally:
+ # Clean up the hooks-side PID lock if it points at us. Stale
+ # entries already pass _pid_alive() == False on POSIX, but
+ # actively removing the file makes the state observable
+ # (callers can stat it) and avoids accidental PID reuse on
+ # short-lived test runs. Only remove if the file claims our
+ # own PID — never another process's.
+ _cleanup_mine_pid_file()
- if not dry_run:
- # Cross-wing topic tunnels: after every file in this wing has been
- # processed, link this wing to any other wing that shares a
- # confirmed TOPIC label. Out of scope for v1: manifest-dependency
- # overlap, per-topic allow/deny lists, search-result surfacing.
- try:
- tunnels_added = _compute_topic_tunnels_for_wing(wing)
- if tunnels_added:
- print(f"\n Topic tunnels: +{tunnels_added} cross-wing link(s)")
- except Exception as e:
- # Tunnel computation must never fail a mine — degrade quietly.
- print(f"\n WARNING: topic tunnel computation skipped — {e}", file=sys.stderr)
- print(f"\n{'=' * 55}")
- print(" Done.")
- print(f" Files processed: {len(files) - files_skipped}")
- print(f" Files skipped (already filed): {files_skipped}")
- print(f" Drawers filed: {total_drawers}")
- print("\n By room:")
- for room, count in sorted(room_counts.items(), key=lambda x: x[1], reverse=True):
- print(f" {room:20} {count} files")
- print('\n Next: mempalace search "what you\'re looking for"')
- print(f"{'=' * 55}\n")
+def _cleanup_mine_pid_file() -> None:
+ """Remove the global mine PID file if it currently points at us.
+
+ The PID file (``~/.mempalace/hook_state/mine.pid``, written by the
+ hook in :func:`mempalace.hooks_cli._spawn_mine`) tracks the PID of
+ the most recently spawned mine subprocess so the hook can dedup
+ concurrent auto-ingest fires. When that subprocess exits — cleanly,
+ on error, or via Ctrl-C — it should remove its own entry so the
+ next hook fire isn't briefly fooled by a stale PID before
+ ``_pid_alive`` returns False.
+
+ We only delete the file if it claims our own PID; any other PID is
+ left alone (could be an unrelated mine running concurrently from
+ a different worktree / session).
+ """
+ try:
+ from .hooks_cli import _MINE_PID_FILE
+ except Exception:
+ return
+ try:
+ if not _MINE_PID_FILE.exists():
+ return
+ recorded = _MINE_PID_FILE.read_text().strip()
+ if recorded and recorded.isdigit() and int(recorded) == os.getpid():
+ _MINE_PID_FILE.unlink()
+ except OSError:
+ # Best-effort cleanup; never fail the mine over PID bookkeeping.
+ pass
def _compute_topic_tunnels_for_wing(wing: str) -> int:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 1c4dfbd..5d36ab7 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,6 +1,7 @@
"""Tests for mempalace.cli — the main CLI dispatcher."""
import argparse
+import shlex
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch
@@ -108,6 +109,7 @@ def test_cmd_init_no_entities(mock_config_cls, tmp_path):
with (
patch("mempalace.entity_detector.scan_for_detection", return_value=[]),
patch("mempalace.room_detector_local.detect_rooms_local") as mock_rooms,
+ patch("mempalace.cli._maybe_run_mine_after_init"),
):
cmd_init(args)
mock_rooms.assert_called_once_with(project_dir=str(tmp_path), yes=True)
@@ -126,6 +128,7 @@ def test_cmd_init_with_entities(mock_config_cls, tmp_path):
patch("mempalace.entity_detector.confirm_entities", return_value=confirmed),
patch("mempalace.room_detector_local.detect_rooms_local"),
patch("builtins.open", MagicMock()),
+ patch("mempalace.cli._maybe_run_mine_after_init"),
):
cmd_init(args)
@@ -140,12 +143,238 @@ def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys):
patch("mempalace.entity_detector.scan_for_detection", return_value=fake_files),
patch("mempalace.entity_detector.detect_entities", return_value=detected),
patch("mempalace.room_detector_local.detect_rooms_local"),
+ patch("mempalace.cli._maybe_run_mine_after_init"),
):
cmd_init(args)
out = capsys.readouterr().out
assert "No entities detected" in out
+# ── _maybe_run_mine_after_init (init → mine prompt, #1181) ─────────────
+
+
+def _init_args(tmp_path, *, yes=False, auto_mine=False):
+ return argparse.Namespace(dir=str(tmp_path), yes=yes, auto_mine=auto_mine)
+
+
+def _fake_cfg(tmp_path):
+ cfg = MagicMock()
+ cfg.palace_path = str(tmp_path / "palace")
+ return cfg
+
+
+def _fake_scanned(tmp_path, n=3):
+ """Build n real Path objects with stat()-able sizes for the scan estimate."""
+ paths = []
+ for i in range(n):
+ p = tmp_path / f"f{i}.txt"
+ p.write_text("x" * 1024) # 1 KB each
+ paths.append(p)
+ return paths
+
+
+def test_maybe_run_mine_prompt_accepted_runs_mine(tmp_path):
+ """Empty / 'y' / 'yes' on the prompt triggers mine() in-process."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=False, auto_mine=False)
+ cfg = _fake_cfg(tmp_path)
+ scanned = _fake_scanned(tmp_path, n=3)
+ with (
+ patch("mempalace.miner.mine") as mock_mine,
+ patch("mempalace.miner.scan_project", return_value=scanned),
+ patch("builtins.input", return_value=""),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ mock_mine.assert_called_once_with(
+ project_dir=str(tmp_path),
+ palace_path=cfg.palace_path,
+ files=scanned,
+ )
+
+
+def test_maybe_run_mine_prompt_yes_accepted_runs_mine(tmp_path):
+ """Explicit 'y' answer also runs mine()."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=False, auto_mine=False)
+ cfg = _fake_cfg(tmp_path)
+ with (
+ patch("mempalace.miner.mine") as mock_mine,
+ patch("mempalace.miner.scan_project", return_value=[]),
+ patch("builtins.input", return_value="Y"),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ mock_mine.assert_called_once()
+
+
+def test_maybe_run_mine_prompt_declined_prints_hint(tmp_path, capsys):
+ """'n' answer skips mine() and prints the resume hint."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=False, auto_mine=False)
+ cfg = _fake_cfg(tmp_path)
+ with (
+ patch("mempalace.miner.mine") as mock_mine,
+ patch("mempalace.miner.scan_project", return_value=[]),
+ patch("builtins.input", return_value="n"),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ mock_mine.assert_not_called()
+ out = capsys.readouterr().out
+ # shlex.quote is a no-op on POSIX-safe paths but wraps Windows paths
+ # (which contain backslashes) in single quotes, so the assertion has
+ # to mirror what the production code actually emits.
+ assert f"mempalace mine {shlex.quote(str(tmp_path))}" in out
+ assert "Skipped" in out
+
+
+def test_maybe_run_mine_yes_alone_still_prompts(tmp_path):
+ """`--yes` is scoped to entity auto-accept and MUST still prompt for mine.
+
+ Regression guard for the flag-overload review feedback on #1183: extending
+ `--yes` to also auto-mine would silently change behaviour for scripted
+ callers and turn a fast command into a minutes-long ChromaDB write.
+ """
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=True, auto_mine=False)
+ cfg = _fake_cfg(tmp_path)
+ with (
+ patch("mempalace.miner.mine") as mock_mine,
+ patch("mempalace.miner.scan_project", return_value=[]),
+ patch("builtins.input", return_value="n") as mock_input,
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ mock_input.assert_called_once() # the prompt MUST fire
+ mock_mine.assert_not_called()
+
+
+def test_maybe_run_mine_auto_mine_skips_prompt(tmp_path):
+ """`--auto-mine` runs mine() automatically without calling input()."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=False, auto_mine=True)
+ cfg = _fake_cfg(tmp_path)
+ scanned = _fake_scanned(tmp_path, n=2)
+ with (
+ patch("mempalace.miner.mine") as mock_mine,
+ patch("mempalace.miner.scan_project", return_value=scanned),
+ patch("builtins.input", side_effect=AssertionError("input() must not be called")),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ mock_mine.assert_called_once_with(
+ project_dir=str(tmp_path),
+ palace_path=cfg.palace_path,
+ files=scanned,
+ )
+
+
+def test_maybe_run_mine_yes_and_auto_mine_fully_noninteractive(tmp_path):
+ """`--yes --auto-mine` together: never call input(), always mine."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=True, auto_mine=True)
+ cfg = _fake_cfg(tmp_path)
+ with (
+ patch("mempalace.miner.mine") as mock_mine,
+ patch("mempalace.miner.scan_project", return_value=[]),
+ patch("builtins.input", side_effect=AssertionError("input() must not be called")),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ mock_mine.assert_called_once()
+
+
+def test_maybe_run_mine_decline_quotes_path_with_spaces(tmp_path, capsys):
+ """The resume hint must shell-quote the project dir so paths with
+ spaces / metacharacters produce a copy-paste-safe command."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ spaced_dir = tmp_path / "my project dir"
+ spaced_dir.mkdir()
+ args = argparse.Namespace(dir=str(spaced_dir), yes=False, auto_mine=False)
+ cfg = _fake_cfg(tmp_path)
+ with (
+ patch("mempalace.miner.mine"),
+ patch("mempalace.miner.scan_project", return_value=[]),
+ patch("builtins.input", return_value="n"),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ out = capsys.readouterr().out
+ # shlex.quote wraps paths with spaces (and Windows backslashes) in
+ # single quotes — the assertion must use the same shlex form so the
+ # test passes on every platform's tmp_path layout.
+ assert f"mempalace mine {shlex.quote(str(spaced_dir))}" in out
+ # Bare unquoted form must NOT appear — that's the bug we're guarding.
+ assert f"mempalace mine {spaced_dir} " not in out
+ assert f"mempalace mine {spaced_dir}`" not in out
+
+
+def test_maybe_run_mine_eof_on_stdin_treated_as_decline(tmp_path, capsys):
+ """Piped / non-interactive stdin (EOFError) declines without crashing."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=False, auto_mine=False)
+ cfg = _fake_cfg(tmp_path)
+ with (
+ patch("mempalace.miner.mine") as mock_mine,
+ patch("mempalace.miner.scan_project", return_value=[]),
+ patch("builtins.input", side_effect=EOFError),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+ mock_mine.assert_not_called()
+ assert "Skipped" in capsys.readouterr().out
+
+
+def test_maybe_run_mine_failure_surfaces_via_exit(tmp_path, capsys):
+ """Mine errors are not swallowed — they exit non-zero with an error line."""
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=False, auto_mine=True)
+ cfg = _fake_cfg(tmp_path)
+ with (
+ patch("mempalace.miner.mine", side_effect=RuntimeError("boom")),
+ patch("mempalace.miner.scan_project", return_value=[]),
+ ):
+ with pytest.raises(SystemExit) as exc_info:
+ _maybe_run_mine_after_init(args, cfg)
+ assert exc_info.value.code == 1
+ err = capsys.readouterr().err
+ assert "boom" in err
+
+
+def test_maybe_run_mine_estimate_appears_before_prompt(tmp_path, capsys):
+ """The file-count + size estimate line MUST render BEFORE the prompt.
+
+ Required by the spec: hitting Enter on a default-Y prompt with no size
+ info is a footgun on a real corpus where mine takes minutes. The user
+ must see scope before being asked to confirm.
+ """
+ from mempalace.cli import _maybe_run_mine_after_init
+
+ args = _init_args(tmp_path, yes=False, auto_mine=False)
+ cfg = _fake_cfg(tmp_path)
+ scanned = _fake_scanned(tmp_path, n=4) # 4 files * 1 KB each
+ captured_when_prompted = {}
+
+ def fake_input(prompt):
+ # Snapshot what stdout looked like at the moment the prompt fires.
+ captured_when_prompted["stdout"] = capsys.readouterr().out
+ return "n"
+
+ with (
+ patch("mempalace.miner.mine"),
+ patch("mempalace.miner.scan_project", return_value=scanned),
+ patch("builtins.input", side_effect=fake_input),
+ ):
+ _maybe_run_mine_after_init(args, cfg)
+
+ pre_prompt = captured_when_prompted["stdout"]
+ assert "4 files" in pre_prompt, f"file count missing from pre-prompt output: {pre_prompt!r}"
+ assert "MB" in pre_prompt, f"size estimate missing from pre-prompt output: {pre_prompt!r}"
+ assert "would be mined" in pre_prompt
+
+
# ── cmd_mine ───────────────────────────────────────────────────────────
diff --git a/tests/test_miner.py b/tests/test_miner.py
index 7f142bb..0619dbb 100644
--- a/tests/test_miner.py
+++ b/tests/test_miner.py
@@ -1,4 +1,5 @@
import os
+import shlex
import shutil
import tempfile
from pathlib import Path
@@ -600,3 +601,145 @@ def test_mine_no_tunnel_when_only_one_wing_has_topics(tmp_path, monkeypatch):
mine(str(project_root), str(palace_path))
assert palace_graph.list_tunnels() == []
+
+
+# ── graceful Ctrl-C handling (#1182) ────────────────────────────────────
+
+
+def _make_minable_project(project_root: Path, n_files: int = 3) -> None:
+ """Create a tiny project with N readable files + a config so mine() runs."""
+ for idx in range(n_files):
+ write_file(
+ project_root / f"f{idx}.py",
+ f"def fn_{idx}():\n print('hi {idx}')\n" * 20,
+ )
+ with open(project_root / "mempalace.yaml", "w") as f:
+ yaml.dump(
+ {
+ "wing": "interrupt_test",
+ "rooms": [{"name": "general", "description": "General"}],
+ },
+ f,
+ )
+
+
+def test_mine_keyboard_interrupt_prints_summary_and_exits_130(tmp_path, capsys):
+ """A KeyboardInterrupt mid-loop produces the clean summary + exit 130."""
+ import pytest
+ from unittest.mock import patch
+
+ project_root = tmp_path / "proj"
+ project_root.mkdir()
+ _make_minable_project(project_root, n_files=4)
+ palace_path = project_root / "palace"
+
+ call_count = {"n": 0}
+
+ def fake_process_file(*args, **kwargs):
+ call_count["n"] += 1
+ if call_count["n"] == 2:
+ raise KeyboardInterrupt
+ return (1, "general")
+
+ with patch("mempalace.miner.process_file", side_effect=fake_process_file):
+ with pytest.raises(SystemExit) as exc_info:
+ mine(str(project_root), str(palace_path))
+
+ assert exc_info.value.code == 130
+ out = capsys.readouterr().out
+ assert "Mine interrupted." in out
+ assert "files_processed: 1/" in out
+ assert "drawers_filed:" in out
+ assert "last_file:" in out
+ assert "upserted idempotently" in out
+
+
+def test_mine_keyboard_interrupt_quotes_path_with_spaces_in_resume_hint(tmp_path, capsys):
+ """Resume hint must shell-quote the project dir so a path containing
+ spaces / metacharacters yields a copy-paste-safe `mempalace mine ...`
+ command. Otherwise users on a path like "My Project" hit a broken
+ invocation when they re-run after Ctrl-C."""
+ import pytest
+ from unittest.mock import patch
+
+ project_root = tmp_path / "my project"
+ project_root.mkdir()
+ _make_minable_project(project_root, n_files=2)
+ palace_path = project_root / "palace"
+
+ def fake_process_file(*args, **kwargs):
+ raise KeyboardInterrupt
+
+ with patch("mempalace.miner.process_file", side_effect=fake_process_file):
+ with pytest.raises(SystemExit):
+ mine(str(project_root), str(palace_path))
+
+ out = capsys.readouterr().out
+ # Use shlex.quote so the assertion matches whatever the production
+ # code emits on this platform (POSIX paths with spaces vs Windows
+ # paths with backslashes both end up wrapped in single quotes).
+ assert f"mempalace mine {shlex.quote(str(project_root))}" in out
+
+
+def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
+ """Our own PID entry in mine.pid is removed in the finally clause."""
+ import pytest
+ from unittest.mock import patch
+
+ project_root = tmp_path / "proj"
+ project_root.mkdir()
+ _make_minable_project(project_root, n_files=2)
+ palace_path = project_root / "palace"
+
+ pid_file = tmp_path / "mine.pid"
+ pid_file.write_text(str(os.getpid()))
+
+ def fake_process_file(*args, **kwargs):
+ raise KeyboardInterrupt
+
+ with (
+ patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file),
+ patch("mempalace.miner.process_file", side_effect=fake_process_file),
+ ):
+ with pytest.raises(SystemExit):
+ mine(str(project_root), str(palace_path))
+
+ assert not pid_file.exists(), "Our PID entry should be cleaned up on interrupt"
+
+
+def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path):
+ """Successful mine also removes its own PID entry in the finally clause."""
+ from unittest.mock import patch
+
+ project_root = tmp_path / "proj"
+ project_root.mkdir()
+ _make_minable_project(project_root, n_files=1)
+ palace_path = project_root / "palace"
+
+ pid_file = tmp_path / "mine.pid"
+ pid_file.write_text(str(os.getpid()))
+
+ with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file):
+ mine(str(project_root), str(palace_path))
+
+ assert not pid_file.exists()
+
+
+def test_mine_does_not_remove_other_processes_pid_file(tmp_path):
+ """A PID file pointing at someone else's PID is left untouched."""
+ from unittest.mock import patch
+
+ project_root = tmp_path / "proj"
+ project_root.mkdir()
+ _make_minable_project(project_root, n_files=1)
+ palace_path = project_root / "palace"
+
+ other_pid = os.getpid() + 999_999 # a PID that isn't us
+ pid_file = tmp_path / "mine.pid"
+ pid_file.write_text(str(other_pid))
+
+ with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file):
+ mine(str(project_root), str(palace_path))
+
+ assert pid_file.exists(), "Foreign PID entries must not be removed"
+ assert pid_file.read_text().strip() == str(other_pid)