Merge pull request #1195 from MemPalace/fix/wing-name-normalization-tunnels

fix(graph): normalize wing slug at init so topic tunnels fire for hyphenated dirs (#1194)
This commit is contained in:
Igor Lins e Silva
2026-04-27 03:20:46 -03:00
committed by GitHub
9 changed files with 107 additions and 9 deletions
+1
View File
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
### Bug Fixes ### Bug Fixes
- **Cross-wing topic tunnels for hyphenated dir names.** `mempalace init` recorded the `topics_by_wing` registry key under the raw directory name (e.g. `mempalace-public`), while `mempalace.yaml`'s `wing` field used the lower-cased + separator-collapsed slug (`mempalace_public`). At mine time the miner read the slug from the yaml and missed the registry, so `_compute_topic_tunnels_for_wing` returned `0` silently. Real-world: any project whose folder contained a hyphen or space lost every topic tunnel. Now both call sites route through a shared `normalize_wing_name()` in `config.py`. (#1194, follow-up to #1180)
- **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap. - **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap.
- **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179) - **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179)
- **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182) - **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182)
+6 -5
View File
@@ -329,13 +329,14 @@ def cmd_init(args):
json.dump(confirmed, f, indent=2, ensure_ascii=False) json.dump(confirmed, f, indent=2, ensure_ascii=False)
print(f" Entities saved: {entities_path}") print(f" Entities saved: {entities_path}")
from .config import normalize_wing_name
from .miner import add_to_known_entities from .miner import add_to_known_entities
# Wing matches the default produced by ``room_detector_local`` # Match the slug ``room_detector_local`` writes into
# (folder basename) and the miner fallback in ``load_config``. # ``mempalace.yaml`` so the miner's tunnel lookup hits the
# Used by the topics_by_wing map so cross-wing tunnels can be # same key in ``topics_by_wing`` at mine time (issue #1194 —
# computed at mine time. # without this, hyphenated dirnames silently lose tunnels).
wing = project_path.name wing = normalize_wing_name(project_path.name)
registry_path = add_to_known_entities(confirmed, wing=wing) registry_path = add_to_known_entities(confirmed, wing=wing)
print(f" Registry updated: {registry_path}") print(f" Registry updated: {registry_path}")
else: else:
+10
View File
@@ -19,6 +19,16 @@ MAX_NAME_LENGTH = 128
_SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$") _SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$")
def normalize_wing_name(name: str) -> str:
"""Lower-case + collapse separators (`-`, ` `) to `_` for wing slugs.
The same rule is applied by ``init`` when persisting `topics_by_wing`
and when writing `mempalace.yaml`, so the miner's lookup matches at
mine time regardless of the source dirname.
"""
return name.lower().replace(" ", "_").replace("-", "_")
def sanitize_name(value: str, field_name: str = "name") -> str: def sanitize_name(value: str, field_name: str = "name") -> str:
"""Validate and sanitize a wing/room/entity name. """Validate and sanitize a wing/room/entity name.
+3 -1
View File
@@ -394,7 +394,9 @@ def mine_convos(
convo_path = Path(convo_dir).expanduser().resolve() convo_path = Path(convo_dir).expanduser().resolve()
if not wing: if not wing:
wing = convo_path.name.lower().replace(" ", "_").replace("-", "_") from .config import normalize_wing_name
wing = normalize_wing_name(convo_path.name)
files = scan_convos(convo_dir) files = scan_convos(convo_dir)
if limit > 0: if limit > 0:
+9 -1
View File
@@ -286,7 +286,15 @@ def load_config(project_dir: str) -> dict:
if legacy_path.exists(): if legacy_path.exists():
config_path = legacy_path config_path = legacy_path
else: else:
wing_name = resolved_project_dir.name from .config import normalize_wing_name
# Normalize the dirname-derived fallback wing the same way
# ``cmd_init`` and ``room_detector_local`` do — otherwise a
# hyphenated project mined without a yaml file lands under a
# raw-name wing while ``topics_by_wing`` was keyed under the
# normalized slug, silently dropping every topic tunnel
# (the no-yaml branch of issue #1194).
wing_name = normalize_wing_name(resolved_project_dir.name)
print( print(
f" No mempalace.yaml found in {resolved_project_dir} " f" No mempalace.yaml found in {resolved_project_dir} "
f"— using auto-detected defaults (wing='{wing_name}'). " f"— using auto-detected defaults (wing='{wing_name}'). "
+3 -1
View File
@@ -303,8 +303,10 @@ def save_config(project_dir: str, project_name: str, rooms: list):
def detect_rooms_local(project_dir: str, yes: bool = False): def detect_rooms_local(project_dir: str, yes: bool = False):
"""Main entry point for local setup.""" """Main entry point for local setup."""
from .config import normalize_wing_name
project_path = Path(project_dir).expanduser().resolve() project_path = Path(project_dir).expanduser().resolve()
project_name = project_path.name.lower().replace(" ", "_").replace("-", "_") project_name = normalize_wing_name(project_path.name)
if not project_path.exists(): if not project_path.exists():
print(f"ERROR: Directory not found: {project_dir}") print(f"ERROR: Directory not found: {project_dir}")
+37
View File
@@ -138,6 +138,43 @@ def test_cmd_init_with_entities(mock_config_cls, tmp_path):
cmd_init(args) cmd_init(args)
@patch("mempalace.cli.MempalaceConfig")
def test_cmd_init_normalizes_wing_name_for_topics_registry(mock_config_cls, tmp_path):
"""Regression for #1194: hyphenated dir names must be normalized to the
same slug ``mempalace.yaml`` uses, otherwise ``topics_by_wing`` keys
miss the miner's lookup at mine time and tunnels are silently dropped.
"""
project = tmp_path / "my-cool-app"
project.mkdir()
fake_files = [project / "a.txt"]
detected = {
"people": [{"name": "Alice"}],
"projects": [],
"topics": [{"name": "Bun"}],
"uncertain": [],
}
confirmed = {"people": ["Alice"], "projects": [], "topics": ["Bun"]}
args = argparse.Namespace(dir=str(project), yes=True)
with (
patch("mempalace.entity_detector.scan_for_detection", return_value=fake_files),
patch("mempalace.entity_detector.detect_entities", return_value=detected),
patch("mempalace.entity_detector.confirm_entities", return_value=confirmed),
patch("mempalace.miner.add_to_known_entities") as mock_register,
patch("mempalace.room_detector_local.detect_rooms_local"),
patch("builtins.open", MagicMock()),
patch("mempalace.cli._maybe_run_mine_after_init"),
# Pass-zero corpus-origin detection runs unconditionally inside
# cmd_init now (#1221 / #1223). It accesses MempalaceConfig fields
# that don't survive MagicMock stringification, so stub it out —
# this test only cares about the wing-slug write to the registry.
patch("mempalace.cli._run_pass_zero", return_value=None),
):
mock_register.return_value = "/tmp/known_entities.json"
cmd_init(args)
mock_register.assert_called_once()
assert mock_register.call_args.kwargs["wing"] == "my_cool_app"
@patch("mempalace.cli.MempalaceConfig") @patch("mempalace.cli.MempalaceConfig")
def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys): def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys):
"""When entities detected but total is 0, prints 'No entities' message.""" """When entities detected but total is 0, prints 'No entities' message."""
+20 -1
View File
@@ -3,7 +3,7 @@ import json
import tempfile import tempfile
import pytest import pytest
from mempalace.config import MempalaceConfig, sanitize_kg_value, sanitize_name from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name
def test_default_config(): def test_default_config():
@@ -110,6 +110,25 @@ def test_init():
assert os.path.exists(os.path.join(tmpdir, "config.json")) assert os.path.exists(os.path.join(tmpdir, "config.json"))
# --- normalize_wing_name ---
def test_normalize_wing_name_hyphen():
assert normalize_wing_name("mempal-private") == "mempal_private"
def test_normalize_wing_name_space():
assert normalize_wing_name("My Project") == "my_project"
def test_normalize_wing_name_already_clean():
assert normalize_wing_name("memorymark") == "memorymark"
def test_normalize_wing_name_mixed():
assert normalize_wing_name("My-Cool App") == "my_cool_app"
# --- sanitize_name --- # --- sanitize_name ---
+18
View File
@@ -67,6 +67,24 @@ def test_load_config_uses_defaults_when_yaml_missing():
shutil.rmtree(tmpdir) shutil.rmtree(tmpdir)
def test_load_config_no_yaml_normalizes_hyphenated_wing():
"""Fallback wing name is normalized so it matches topics_by_wing keys.
Regression for the no-yaml branch of #1194: ``cmd_init`` writes
``topics_by_wing`` under the normalized slug, so the miner's
fallback wing must use the same normalization or the tunnel lookup
misses every key for hyphenated dirnames.
"""
parent = tempfile.mkdtemp()
try:
project_root = Path(parent) / "my-cool-app"
project_root.mkdir()
config = load_config(str(project_root))
assert config["wing"] == "my_cool_app"
finally:
shutil.rmtree(parent)
def test_scan_project_skips_mempalace_generated_files(): def test_scan_project_skips_mempalace_generated_files():
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
project_root = Path(tmpdir).resolve() project_root = Path(tmpdir).resolve()