From b7f0a8af01b902d0288869e28505d0fb94e36a99 Mon Sep 17 00:00:00 2001 From: bensig <1872138+bensig@users.noreply.github.com> Date: Sat, 25 Apr 2026 02:47:15 -0700 Subject: [PATCH 1/3] fix(graph): normalize wing slug at init so topic tunnels fire for hyphenated dirs (#1194) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `init` was recording `topics_by_wing[]` while `mempalace.yaml` got the lower-cased separator-collapsed slug. At mine time the miner read the slug from the yaml and missed the registry key, so `_compute_topic_tunnels_for_wing` returned 0 silently for every project whose folder contained a `-` or a space — the most common shape in the wild. Extracted the rule into `config.normalize_wing_name()` and routed both `cli.cmd_init` (registry write) and `room_detector_local.detect_rooms_local` (yaml write) through it. Added a regression test in `test_cli.py` asserting the registry call uses the normalized slug, plus four direct unit tests for the helper. Refs #1180. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + mempalace/cli.py | 11 ++++++----- mempalace/config.py | 10 ++++++++++ mempalace/room_detector_local.py | 4 +++- tests/test_cli.py | 32 ++++++++++++++++++++++++++++++++ tests/test_config.py | 21 ++++++++++++++++++++- 6 files changed, 72 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25b7853..11b9497 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Bug Fixes +- **Cross-wing topic tunnels for hyphenated dir names.** `mempalace init` recorded the `topics_by_wing` registry key under the raw directory name (e.g. `mempalace-public`), while `mempalace.yaml`'s `wing` field used the lower-cased + separator-collapsed slug (`mempalace_public`). At mine time the miner read the slug from the yaml and missed the registry, so `_compute_topic_tunnels_for_wing` returned `0` silently. Real-world: any project whose folder contained a hyphen or space lost every topic tunnel. Now both call sites route through a shared `normalize_wing_name()` in `config.py`. (#1194, follow-up to #1180) - **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap. - **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179) - **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182) diff --git a/mempalace/cli.py b/mempalace/cli.py index 80ac9b0..5efa075 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -329,13 +329,14 @@ def cmd_init(args): json.dump(confirmed, f, indent=2, ensure_ascii=False) print(f" Entities saved: {entities_path}") + from .config import normalize_wing_name from .miner import add_to_known_entities - # Wing matches the default produced by ``room_detector_local`` - # (folder basename) and the miner fallback in ``load_config``. - # Used by the topics_by_wing map so cross-wing tunnels can be - # computed at mine time. - wing = project_path.name + # Match the slug ``room_detector_local`` writes into + # ``mempalace.yaml`` so the miner's tunnel lookup hits the + # same key in ``topics_by_wing`` at mine time (issue #1194 — + # without this, hyphenated dirnames silently lose tunnels). + wing = normalize_wing_name(project_path.name) registry_path = add_to_known_entities(confirmed, wing=wing) print(f" Registry updated: {registry_path}") else: diff --git a/mempalace/config.py b/mempalace/config.py index 8e12b6b..cacd1f9 100644 --- a/mempalace/config.py +++ b/mempalace/config.py @@ -19,6 +19,16 @@ MAX_NAME_LENGTH = 128 _SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$") +def normalize_wing_name(name: str) -> str: + """Lower-case + collapse separators (`-`, ` `) to `_` for wing slugs. + + The same rule is applied by ``init`` when persisting `topics_by_wing` + and when writing `mempalace.yaml`, so the miner's lookup matches at + mine time regardless of the source dirname. + """ + return name.lower().replace(" ", "_").replace("-", "_") + + def sanitize_name(value: str, field_name: str = "name") -> str: """Validate and sanitize a wing/room/entity name. diff --git a/mempalace/room_detector_local.py b/mempalace/room_detector_local.py index 32e75c3..31d5b05 100644 --- a/mempalace/room_detector_local.py +++ b/mempalace/room_detector_local.py @@ -303,8 +303,10 @@ def save_config(project_dir: str, project_name: str, rooms: list): def detect_rooms_local(project_dir: str, yes: bool = False): """Main entry point for local setup.""" + from .config import normalize_wing_name + project_path = Path(project_dir).expanduser().resolve() - project_name = project_path.name.lower().replace(" ", "_").replace("-", "_") + project_name = normalize_wing_name(project_path.name) if not project_path.exists(): print(f"ERROR: Directory not found: {project_dir}") diff --git a/tests/test_cli.py b/tests/test_cli.py index b9427d5..02dca19 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -138,6 +138,38 @@ def test_cmd_init_with_entities(mock_config_cls, tmp_path): cmd_init(args) +@patch("mempalace.cli.MempalaceConfig") +def test_cmd_init_normalizes_wing_name_for_topics_registry(mock_config_cls, tmp_path): + """Regression for #1194: hyphenated dir names must be normalized to the + same slug ``mempalace.yaml`` uses, otherwise ``topics_by_wing`` keys + miss the miner's lookup at mine time and tunnels are silently dropped. + """ + project = tmp_path / "my-cool-app" + project.mkdir() + fake_files = [project / "a.txt"] + detected = { + "people": [{"name": "Alice"}], + "projects": [], + "topics": [{"name": "Bun"}], + "uncertain": [], + } + confirmed = {"people": ["Alice"], "projects": [], "topics": ["Bun"]} + args = argparse.Namespace(dir=str(project), yes=True) + with ( + patch("mempalace.entity_detector.scan_for_detection", return_value=fake_files), + patch("mempalace.entity_detector.detect_entities", return_value=detected), + patch("mempalace.entity_detector.confirm_entities", return_value=confirmed), + patch("mempalace.miner.add_to_known_entities") as mock_register, + patch("mempalace.room_detector_local.detect_rooms_local"), + patch("builtins.open", MagicMock()), + patch("mempalace.cli._maybe_run_mine_after_init"), + ): + mock_register.return_value = "/tmp/known_entities.json" + cmd_init(args) + mock_register.assert_called_once() + assert mock_register.call_args.kwargs["wing"] == "my_cool_app" + + @patch("mempalace.cli.MempalaceConfig") def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys): """When entities detected but total is 0, prints 'No entities' message.""" diff --git a/tests/test_config.py b/tests/test_config.py index 8d9753b..d7707d9 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -3,7 +3,7 @@ import json import tempfile import pytest -from mempalace.config import MempalaceConfig, sanitize_kg_value, sanitize_name +from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name def test_default_config(): @@ -110,6 +110,25 @@ def test_init(): assert os.path.exists(os.path.join(tmpdir, "config.json")) +# --- normalize_wing_name --- + + +def test_normalize_wing_name_hyphen(): + assert normalize_wing_name("mempal-private") == "mempal_private" + + +def test_normalize_wing_name_space(): + assert normalize_wing_name("My Project") == "my_project" + + +def test_normalize_wing_name_already_clean(): + assert normalize_wing_name("memorymark") == "memorymark" + + +def test_normalize_wing_name_mixed(): + assert normalize_wing_name("My-Cool App") == "my_cool_app" + + # --- sanitize_name --- From 3bebef1503ab4c48ea68e9a924d3b11783de603b Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 27 Apr 2026 02:50:12 -0300 Subject: [PATCH 2/3] fix(miner,convo_miner): close remaining wing-name normalization gaps (#1194) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups against the review on this PR: 1. ``miner.load_config`` no-yaml fallback was returning the raw dirname as the wing, while ``cmd_init`` writes ``topics_by_wing`` under the normalized slug. A hyphenated project mined without a ``mempalace.yaml`` file silently lost every topic tunnel — same key-miss class as #1194, just down the no-yaml branch (raised by Qodo on this PR). 2. ``convo_miner`` was applying the lower/replace rule inline at one call site. Now folded through ``normalize_wing_name`` so all wing-slug producers — ``cmd_init``, ``room_detector_local``, ``miner.load_config`` fallback, ``convo_miner`` — share a single source of truth. No behavior change for any input; pure consolidation. Added ``test_load_config_no_yaml_normalizes_hyphenated_wing`` to lock the fallback path to the normalized slug — fails on develop without the miner change. Co-Authored-By: Claude Opus 4.7 (1M context) --- mempalace/convo_miner.py | 4 +++- mempalace/miner.py | 10 +++++++++- tests/test_miner.py | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/mempalace/convo_miner.py b/mempalace/convo_miner.py index a91cf33..2cf57e4 100644 --- a/mempalace/convo_miner.py +++ b/mempalace/convo_miner.py @@ -394,7 +394,9 @@ def mine_convos( convo_path = Path(convo_dir).expanduser().resolve() if not wing: - wing = convo_path.name.lower().replace(" ", "_").replace("-", "_") + from .config import normalize_wing_name + + wing = normalize_wing_name(convo_path.name) files = scan_convos(convo_dir) if limit > 0: diff --git a/mempalace/miner.py b/mempalace/miner.py index 2d610ea..ba0c630 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -286,7 +286,15 @@ def load_config(project_dir: str) -> dict: if legacy_path.exists(): config_path = legacy_path else: - wing_name = resolved_project_dir.name + from .config import normalize_wing_name + + # Normalize the dirname-derived fallback wing the same way + # ``cmd_init`` and ``room_detector_local`` do — otherwise a + # hyphenated project mined without a yaml file lands under a + # raw-name wing while ``topics_by_wing`` was keyed under the + # normalized slug, silently dropping every topic tunnel + # (the no-yaml branch of issue #1194). + wing_name = normalize_wing_name(resolved_project_dir.name) print( f" No mempalace.yaml found in {resolved_project_dir} " f"— using auto-detected defaults (wing='{wing_name}'). " diff --git a/tests/test_miner.py b/tests/test_miner.py index 0619dbb..10124ee 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -67,6 +67,24 @@ def test_load_config_uses_defaults_when_yaml_missing(): shutil.rmtree(tmpdir) +def test_load_config_no_yaml_normalizes_hyphenated_wing(): + """Fallback wing name is normalized so it matches topics_by_wing keys. + + Regression for the no-yaml branch of #1194: ``cmd_init`` writes + ``topics_by_wing`` under the normalized slug, so the miner's + fallback wing must use the same normalization or the tunnel lookup + misses every key for hyphenated dirnames. + """ + parent = tempfile.mkdtemp() + try: + project_root = Path(parent) / "my-cool-app" + project_root.mkdir() + config = load_config(str(project_root)) + assert config["wing"] == "my_cool_app" + finally: + shutil.rmtree(parent) + + def test_scan_project_skips_mempalace_generated_files(): with tempfile.TemporaryDirectory() as tmpdir: project_root = Path(tmpdir).resolve() From cfca40c5ec1d0e3de848d72d2d09d416cd3a3aa8 Mon Sep 17 00:00:00 2001 From: igorls <4753812+igorls@users.noreply.github.com> Date: Mon, 27 Apr 2026 03:14:02 -0300 Subject: [PATCH 3/3] test(cli): mock _run_pass_zero so wing-name test survives corpus-origin cmd_init now invokes ``_run_pass_zero`` unconditionally (#1221, #1223 landed on develop after this PR's branch point). The pass reads sample content via ``builtins.open``; with that mocked to MagicMock, the downstream ``"\\n\\n".join(samples)`` in ``corpus_origin.detect_origin_heuristic`` raises ``TypeError: expected str instance, MagicMock found``. This test only cares about the wing-slug write to the registry, so stub the pass-zero call directly rather than try to satisfy its full sample-gathering contract. --- tests/test_cli.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 02dca19..af7b39d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -163,6 +163,11 @@ def test_cmd_init_normalizes_wing_name_for_topics_registry(mock_config_cls, tmp_ patch("mempalace.room_detector_local.detect_rooms_local"), patch("builtins.open", MagicMock()), patch("mempalace.cli._maybe_run_mine_after_init"), + # Pass-zero corpus-origin detection runs unconditionally inside + # cmd_init now (#1221 / #1223). It accesses MempalaceConfig fields + # that don't survive MagicMock stringification, so stub it out — + # this test only cares about the wing-slug write to the registry. + patch("mempalace.cli._run_pass_zero", return_value=None), ): mock_register.return_value = "/tmp/known_entities.json" cmd_init(args)