diff --git a/CHANGELOG.md b/CHANGELOG.md index ad2ae98..b5433ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Added -- **`mempalace init` now prompts to mine the same directory.** After entity confirmation, room detection, and gitignore guard, `init` asks `Mine this directory now? [Y/n]` (default yes) and runs `mine()` in-process if accepted. `--yes` skips the prompt and auto-mines, so non-interactive invocations are still hands-off. Declining prints the exact `mempalace mine ` command for later. Removes the "remember to type the next command" friction that existed because the rooms + entities just set up are almost always mined immediately. (#1181) +- **`mempalace init` now prompts to mine the same directory.** After entity confirmation, room detection, and gitignore guard, `init` shows a one-line scope estimate (e.g. `~423 files (~12 MB) would be mined into this palace.`) computed from its existing corpus walk, then asks `Mine this directory now? [Y/n]` (default yes) and runs `mine()` in-process if accepted. The estimate fires before the prompt so users on a real corpus aren't surprised by a minutes-long ChromaDB write. Declining prints the exact `mempalace mine ` command for later. (#1181) +- **New `--auto-mine` flag on `mempalace init`** for the non-interactive path (`mempalace init --auto-mine ` skips the mine prompt and runs mine directly). `--yes` retains its existing scope of entity auto-accept only and still prompts for the mine step, so existing scripted callers see no behaviour change; combining `--yes --auto-mine` gives a fully non-interactive setup. (#1181) - **Cross-wing topic tunnels.** When two wings have confirmed `TOPIC` labels in common (the LLM-refine bucket from `mempalace init --llm`), the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). Tunnels are routed through the existing `create_tunnel` storage so they share dedup and persistence with explicit tunnels. Topic tunnels are stored under a synthetic `topic:` room and tagged with `kind: "topic"` on the stored dict — this keeps them distinct from literal folder-derived rooms of the same name (a wing with both an `Angular` folder room and an `Angular` topic tunnel no longer collides at `follow_tunnels` read time) and gives LLMs scanning `list_tunnels` a visible discriminator. Threshold is configurable via `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` env var or `topic_tunnel_min_count` in `~/.mempalace/config.json` (default `1`). Manifest-dependency overlap and per-topic allow/deny lists remain out of scope. (#1180) ### Bug Fixes diff --git a/mempalace/cli.py b/mempalace/cli.py index 39bf9e5..88ad0ca 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -159,44 +159,83 @@ def cmd_init(args): # Pass 4: offer to run mine immediately. The directory just had its # rooms + entities set up, so 99% of users will mine next anyway — # asking here removes the "remember to type the next command" friction. - # `--yes` skips the prompt and auto-mines (non-interactive path). + # `--auto-mine` skips the prompt and mines automatically; `--yes` is + # SCOPED to entity auto-accept and does NOT imply mining. _maybe_run_mine_after_init(args, cfg) +def _format_size_mb(num_bytes: int) -> str: + """Render a byte count as a human-readable size for the mine estimate. + + < 1 MB rounds up to ``<1 MB`` so users never see a misleading ``0 MB`` + on small projects. Otherwise reports an integer megabyte count. + """ + if num_bytes <= 0: + return "<1 MB" + mb = num_bytes / (1024 * 1024) + if mb < 1: + return "<1 MB" + return f"{mb:.0f} MB" + + def _maybe_run_mine_after_init(args, cfg) -> None: """Prompt the user to mine the directory just initialised, or auto-mine - when ``--yes`` was passed. Extracted so the prompt path is unit-testable. + when ``--auto-mine`` was passed. Extracted so the prompt path is + unit-testable. + + Behaviour matrix: + + - default (no flags) — prompt, default Yes, mine in-process if accepted + - ``--yes`` — entity auto-accept only; STILL prompts for the mine step + - ``--auto-mine`` — skip the mine prompt and mine directly + - ``--yes --auto-mine`` — fully non-interactive Mine errors are surfaced (not swallowed): a failing mine exits with a non-zero status via :func:`sys.exit` so downstream scripts can see it. + The pre-scan that produces the file-count estimate is reused as the + mine input so we never walk the corpus twice. """ from .miner import mine, scan_project project_dir = args.dir - auto = bool(getattr(args, "yes", False)) + auto_mine = bool(getattr(args, "auto_mine", False)) - # Pre-scan so the user knows roughly what they're agreeing to before - # the prompt. The scan is fast and we'd run it inside mine() anyway. + # Single corpus walk: this scan feeds BOTH the "what would be mined" + # estimate the user sees in the prompt AND the file list mine() will + # process. We pass the result into mine() via the `files` kwarg so it + # doesn't re-walk the tree. try: - files = scan_project(project_dir) - file_count = len(files) + scanned_files = scan_project(project_dir) + file_count = len(scanned_files) + total_bytes = 0 + for fp in scanned_files: + try: + total_bytes += fp.stat().st_size + except OSError: + # Skip files that vanished between scan and stat — mine() + # will skip them too. + continue + size_str = _format_size_mb(total_bytes) except Exception: + scanned_files = None file_count = None + size_str = None - if auto: - print("\n Auto-mining (--yes).") - else: - scope = ( - f" (~{file_count} files in scope)" - if isinstance(file_count, int) and file_count > 0 - else "" - ) - print(f"\n Ready to mine{scope}.") + # Show the scope estimate BEFORE the prompt so the user knows what + # they are agreeing to. On a real corpus mine takes minutes; hitting + # Enter on a default-Y prompt with no size cue is a footgun. + if isinstance(file_count, int): + if size_str: + print(f" ~{file_count} files (~{size_str}) would be mined into this palace.\n") + else: + print(f" ~{file_count} files would be mined into this palace.\n") + + if not auto_mine: try: answer = input(" Mine this directory now? [Y/n] ").strip().lower() except EOFError: # Non-interactive stdin (e.g. piped) — treat like decline so - # we don't block. User can re-run with --yes to opt in. + # we don't block. User can re-run with --auto-mine to opt in. answer = "n" if answer not in ("", "y", "yes"): print(f"\n Skipped. Run `mempalace mine {project_dir}` when ready.") @@ -204,7 +243,11 @@ def _maybe_run_mine_after_init(args, cfg) -> None: palace_path = cfg.palace_path try: - mine(project_dir=project_dir, palace_path=palace_path) + mine( + project_dir=project_dir, + palace_path=palace_path, + files=scanned_files, + ) except KeyboardInterrupt: # mine() handles its own SIGINT summary + sys.exit(130); re-raise # any KeyboardInterrupt that escapes (shouldn't happen) so the @@ -643,6 +686,14 @@ def main(): action="store_true", help="Auto-accept all detected entities (non-interactive)", ) + p_init.add_argument( + "--auto-mine", + action="store_true", + help=( + "Skip the post-init mine prompt and run mine automatically. " + "Combine with --yes for a fully non-interactive setup." + ), + ) p_init.add_argument( "--lang", default=None, diff --git a/mempalace/miner.py b/mempalace/miner.py index 9a2283b..34f46ae 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -981,8 +981,16 @@ def mine( dry_run: bool = False, respect_gitignore: bool = True, include_ignored: list = None, + files: list = None, ): - """Mine a project directory into the palace.""" + """Mine a project directory into the palace. + + ``files`` may optionally be a pre-scanned list of file paths from + :func:`scan_project`. When provided, the corpus walk is skipped — the + caller (e.g. ``init`` showing a file-count estimate before the mine + prompt) avoids walking the tree twice. When ``None`` (the default), + ``mine`` walks the tree itself just like before. + """ project_path = Path(project_dir).expanduser().resolve() config = load_config(project_dir) @@ -990,11 +998,12 @@ def mine( wing = wing_override or config["wing"] rooms = config.get("rooms", [{"name": "general", "description": "All project files"}]) - files = scan_project( - project_dir, - respect_gitignore=respect_gitignore, - include_ignored=include_ignored, - ) + if files is None: + files = scan_project( + project_dir, + respect_gitignore=respect_gitignore, + include_ignored=include_ignored, + ) if limit > 0: files = files[:limit] diff --git a/tests/test_cli.py b/tests/test_cli.py index a2a5cbf..63b3892 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -152,8 +152,8 @@ def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys): # ── _maybe_run_mine_after_init (init → mine prompt, #1181) ───────────── -def _init_args(tmp_path, *, yes=False): - return argparse.Namespace(dir=str(tmp_path), yes=yes) +def _init_args(tmp_path, *, yes=False, auto_mine=False): + return argparse.Namespace(dir=str(tmp_path), yes=yes, auto_mine=auto_mine) def _fake_cfg(tmp_path): @@ -162,26 +162,41 @@ def _fake_cfg(tmp_path): return cfg +def _fake_scanned(tmp_path, n=3): + """Build n real Path objects with stat()-able sizes for the scan estimate.""" + paths = [] + for i in range(n): + p = tmp_path / f"f{i}.txt" + p.write_text("x" * 1024) # 1 KB each + paths.append(p) + return paths + + def test_maybe_run_mine_prompt_accepted_runs_mine(tmp_path): """Empty / 'y' / 'yes' on the prompt triggers mine() in-process.""" from mempalace.cli import _maybe_run_mine_after_init - args = _init_args(tmp_path, yes=False) + args = _init_args(tmp_path, yes=False, auto_mine=False) cfg = _fake_cfg(tmp_path) + scanned = _fake_scanned(tmp_path, n=3) with ( patch("mempalace.miner.mine") as mock_mine, - patch("mempalace.miner.scan_project", return_value=["a", "b", "c"]), + patch("mempalace.miner.scan_project", return_value=scanned), patch("builtins.input", return_value=""), ): _maybe_run_mine_after_init(args, cfg) - mock_mine.assert_called_once_with(project_dir=str(tmp_path), palace_path=cfg.palace_path) + mock_mine.assert_called_once_with( + project_dir=str(tmp_path), + palace_path=cfg.palace_path, + files=scanned, + ) def test_maybe_run_mine_prompt_yes_accepted_runs_mine(tmp_path): """Explicit 'y' answer also runs mine().""" from mempalace.cli import _maybe_run_mine_after_init - args = _init_args(tmp_path, yes=False) + args = _init_args(tmp_path, yes=False, auto_mine=False) cfg = _fake_cfg(tmp_path) with ( patch("mempalace.miner.mine") as mock_mine, @@ -196,7 +211,7 @@ def test_maybe_run_mine_prompt_declined_prints_hint(tmp_path, capsys): """'n' answer skips mine() and prints the resume hint.""" from mempalace.cli import _maybe_run_mine_after_init - args = _init_args(tmp_path, yes=False) + args = _init_args(tmp_path, yes=False, auto_mine=False) cfg = _fake_cfg(tmp_path) with ( patch("mempalace.miner.mine") as mock_mine, @@ -210,11 +225,52 @@ def test_maybe_run_mine_prompt_declined_prints_hint(tmp_path, capsys): assert "Skipped" in out -def test_maybe_run_mine_yes_flag_skips_prompt_and_mines(tmp_path): - """`--yes` runs mine() automatically without calling input().""" +def test_maybe_run_mine_yes_alone_still_prompts(tmp_path): + """`--yes` is scoped to entity auto-accept and MUST still prompt for mine. + + Regression guard for the flag-overload review feedback on #1183: extending + `--yes` to also auto-mine would silently change behaviour for scripted + callers and turn a fast command into a minutes-long ChromaDB write. + """ from mempalace.cli import _maybe_run_mine_after_init - args = _init_args(tmp_path, yes=True) + args = _init_args(tmp_path, yes=True, auto_mine=False) + cfg = _fake_cfg(tmp_path) + with ( + patch("mempalace.miner.mine") as mock_mine, + patch("mempalace.miner.scan_project", return_value=[]), + patch("builtins.input", return_value="n") as mock_input, + ): + _maybe_run_mine_after_init(args, cfg) + mock_input.assert_called_once() # the prompt MUST fire + mock_mine.assert_not_called() + + +def test_maybe_run_mine_auto_mine_skips_prompt(tmp_path): + """`--auto-mine` runs mine() automatically without calling input().""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=False, auto_mine=True) + cfg = _fake_cfg(tmp_path) + scanned = _fake_scanned(tmp_path, n=2) + with ( + patch("mempalace.miner.mine") as mock_mine, + patch("mempalace.miner.scan_project", return_value=scanned), + patch("builtins.input", side_effect=AssertionError("input() must not be called")), + ): + _maybe_run_mine_after_init(args, cfg) + mock_mine.assert_called_once_with( + project_dir=str(tmp_path), + palace_path=cfg.palace_path, + files=scanned, + ) + + +def test_maybe_run_mine_yes_and_auto_mine_fully_noninteractive(tmp_path): + """`--yes --auto-mine` together: never call input(), always mine.""" + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=True, auto_mine=True) cfg = _fake_cfg(tmp_path) with ( patch("mempalace.miner.mine") as mock_mine, @@ -222,14 +278,14 @@ def test_maybe_run_mine_yes_flag_skips_prompt_and_mines(tmp_path): patch("builtins.input", side_effect=AssertionError("input() must not be called")), ): _maybe_run_mine_after_init(args, cfg) - mock_mine.assert_called_once_with(project_dir=str(tmp_path), palace_path=cfg.palace_path) + mock_mine.assert_called_once() def test_maybe_run_mine_eof_on_stdin_treated_as_decline(tmp_path, capsys): """Piped / non-interactive stdin (EOFError) declines without crashing.""" from mempalace.cli import _maybe_run_mine_after_init - args = _init_args(tmp_path, yes=False) + args = _init_args(tmp_path, yes=False, auto_mine=False) cfg = _fake_cfg(tmp_path) with ( patch("mempalace.miner.mine") as mock_mine, @@ -245,7 +301,7 @@ def test_maybe_run_mine_failure_surfaces_via_exit(tmp_path, capsys): """Mine errors are not swallowed — they exit non-zero with an error line.""" from mempalace.cli import _maybe_run_mine_after_init - args = _init_args(tmp_path, yes=True) + args = _init_args(tmp_path, yes=False, auto_mine=True) cfg = _fake_cfg(tmp_path) with ( patch("mempalace.miner.mine", side_effect=RuntimeError("boom")), @@ -258,6 +314,38 @@ def test_maybe_run_mine_failure_surfaces_via_exit(tmp_path, capsys): assert "boom" in err +def test_maybe_run_mine_estimate_appears_before_prompt(tmp_path, capsys): + """The file-count + size estimate line MUST render BEFORE the prompt. + + Required by the spec: hitting Enter on a default-Y prompt with no size + info is a footgun on a real corpus where mine takes minutes. The user + must see scope before being asked to confirm. + """ + from mempalace.cli import _maybe_run_mine_after_init + + args = _init_args(tmp_path, yes=False, auto_mine=False) + cfg = _fake_cfg(tmp_path) + scanned = _fake_scanned(tmp_path, n=4) # 4 files * 1 KB each + captured_when_prompted = {} + + def fake_input(prompt): + # Snapshot what stdout looked like at the moment the prompt fires. + captured_when_prompted["stdout"] = capsys.readouterr().out + return "n" + + with ( + patch("mempalace.miner.mine"), + patch("mempalace.miner.scan_project", return_value=scanned), + patch("builtins.input", side_effect=fake_input), + ): + _maybe_run_mine_after_init(args, cfg) + + pre_prompt = captured_when_prompted["stdout"] + assert "4 files" in pre_prompt, f"file count missing from pre-prompt output: {pre_prompt!r}" + assert "MB" in pre_prompt, f"size estimate missing from pre-prompt output: {pre_prompt!r}" + assert "would be mined" in pre_prompt + + # ── cmd_mine ───────────────────────────────────────────────────────────