Merge pull request #1195 from MemPalace/fix/wing-name-normalization-tunnels
fix(graph): normalize wing slug at init so topic tunnels fire for hyphenated dirs (#1194)
This commit is contained in:
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- **Cross-wing topic tunnels for hyphenated dir names.** `mempalace init` recorded the `topics_by_wing` registry key under the raw directory name (e.g. `mempalace-public`), while `mempalace.yaml`'s `wing` field used the lower-cased + separator-collapsed slug (`mempalace_public`). At mine time the miner read the slug from the yaml and missed the registry, so `_compute_topic_tunnels_for_wing` returned `0` silently. Real-world: any project whose folder contained a hyphen or space lost every topic tunnel. Now both call sites route through a shared `normalize_wing_name()` in `config.py`. (#1194, follow-up to #1180)
|
||||
- **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap.
|
||||
- **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179)
|
||||
- **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182)
|
||||
|
||||
+6
-5
@@ -329,13 +329,14 @@ def cmd_init(args):
|
||||
json.dump(confirmed, f, indent=2, ensure_ascii=False)
|
||||
print(f" Entities saved: {entities_path}")
|
||||
|
||||
from .config import normalize_wing_name
|
||||
from .miner import add_to_known_entities
|
||||
|
||||
# Wing matches the default produced by ``room_detector_local``
|
||||
# (folder basename) and the miner fallback in ``load_config``.
|
||||
# Used by the topics_by_wing map so cross-wing tunnels can be
|
||||
# computed at mine time.
|
||||
wing = project_path.name
|
||||
# Match the slug ``room_detector_local`` writes into
|
||||
# ``mempalace.yaml`` so the miner's tunnel lookup hits the
|
||||
# same key in ``topics_by_wing`` at mine time (issue #1194 —
|
||||
# without this, hyphenated dirnames silently lose tunnels).
|
||||
wing = normalize_wing_name(project_path.name)
|
||||
registry_path = add_to_known_entities(confirmed, wing=wing)
|
||||
print(f" Registry updated: {registry_path}")
|
||||
else:
|
||||
|
||||
@@ -19,6 +19,16 @@ MAX_NAME_LENGTH = 128
|
||||
_SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$")
|
||||
|
||||
|
||||
def normalize_wing_name(name: str) -> str:
|
||||
"""Lower-case + collapse separators (`-`, ` `) to `_` for wing slugs.
|
||||
|
||||
The same rule is applied by ``init`` when persisting `topics_by_wing`
|
||||
and when writing `mempalace.yaml`, so the miner's lookup matches at
|
||||
mine time regardless of the source dirname.
|
||||
"""
|
||||
return name.lower().replace(" ", "_").replace("-", "_")
|
||||
|
||||
|
||||
def sanitize_name(value: str, field_name: str = "name") -> str:
|
||||
"""Validate and sanitize a wing/room/entity name.
|
||||
|
||||
|
||||
@@ -394,7 +394,9 @@ def mine_convos(
|
||||
|
||||
convo_path = Path(convo_dir).expanduser().resolve()
|
||||
if not wing:
|
||||
wing = convo_path.name.lower().replace(" ", "_").replace("-", "_")
|
||||
from .config import normalize_wing_name
|
||||
|
||||
wing = normalize_wing_name(convo_path.name)
|
||||
|
||||
files = scan_convos(convo_dir)
|
||||
if limit > 0:
|
||||
|
||||
+9
-1
@@ -286,7 +286,15 @@ def load_config(project_dir: str) -> dict:
|
||||
if legacy_path.exists():
|
||||
config_path = legacy_path
|
||||
else:
|
||||
wing_name = resolved_project_dir.name
|
||||
from .config import normalize_wing_name
|
||||
|
||||
# Normalize the dirname-derived fallback wing the same way
|
||||
# ``cmd_init`` and ``room_detector_local`` do — otherwise a
|
||||
# hyphenated project mined without a yaml file lands under a
|
||||
# raw-name wing while ``topics_by_wing`` was keyed under the
|
||||
# normalized slug, silently dropping every topic tunnel
|
||||
# (the no-yaml branch of issue #1194).
|
||||
wing_name = normalize_wing_name(resolved_project_dir.name)
|
||||
print(
|
||||
f" No mempalace.yaml found in {resolved_project_dir} "
|
||||
f"— using auto-detected defaults (wing='{wing_name}'). "
|
||||
|
||||
@@ -303,8 +303,10 @@ def save_config(project_dir: str, project_name: str, rooms: list):
|
||||
|
||||
def detect_rooms_local(project_dir: str, yes: bool = False):
|
||||
"""Main entry point for local setup."""
|
||||
from .config import normalize_wing_name
|
||||
|
||||
project_path = Path(project_dir).expanduser().resolve()
|
||||
project_name = project_path.name.lower().replace(" ", "_").replace("-", "_")
|
||||
project_name = normalize_wing_name(project_path.name)
|
||||
|
||||
if not project_path.exists():
|
||||
print(f"ERROR: Directory not found: {project_dir}")
|
||||
|
||||
@@ -138,6 +138,43 @@ def test_cmd_init_with_entities(mock_config_cls, tmp_path):
|
||||
cmd_init(args)
|
||||
|
||||
|
||||
@patch("mempalace.cli.MempalaceConfig")
|
||||
def test_cmd_init_normalizes_wing_name_for_topics_registry(mock_config_cls, tmp_path):
|
||||
"""Regression for #1194: hyphenated dir names must be normalized to the
|
||||
same slug ``mempalace.yaml`` uses, otherwise ``topics_by_wing`` keys
|
||||
miss the miner's lookup at mine time and tunnels are silently dropped.
|
||||
"""
|
||||
project = tmp_path / "my-cool-app"
|
||||
project.mkdir()
|
||||
fake_files = [project / "a.txt"]
|
||||
detected = {
|
||||
"people": [{"name": "Alice"}],
|
||||
"projects": [],
|
||||
"topics": [{"name": "Bun"}],
|
||||
"uncertain": [],
|
||||
}
|
||||
confirmed = {"people": ["Alice"], "projects": [], "topics": ["Bun"]}
|
||||
args = argparse.Namespace(dir=str(project), yes=True)
|
||||
with (
|
||||
patch("mempalace.entity_detector.scan_for_detection", return_value=fake_files),
|
||||
patch("mempalace.entity_detector.detect_entities", return_value=detected),
|
||||
patch("mempalace.entity_detector.confirm_entities", return_value=confirmed),
|
||||
patch("mempalace.miner.add_to_known_entities") as mock_register,
|
||||
patch("mempalace.room_detector_local.detect_rooms_local"),
|
||||
patch("builtins.open", MagicMock()),
|
||||
patch("mempalace.cli._maybe_run_mine_after_init"),
|
||||
# Pass-zero corpus-origin detection runs unconditionally inside
|
||||
# cmd_init now (#1221 / #1223). It accesses MempalaceConfig fields
|
||||
# that don't survive MagicMock stringification, so stub it out —
|
||||
# this test only cares about the wing-slug write to the registry.
|
||||
patch("mempalace.cli._run_pass_zero", return_value=None),
|
||||
):
|
||||
mock_register.return_value = "/tmp/known_entities.json"
|
||||
cmd_init(args)
|
||||
mock_register.assert_called_once()
|
||||
assert mock_register.call_args.kwargs["wing"] == "my_cool_app"
|
||||
|
||||
|
||||
@patch("mempalace.cli.MempalaceConfig")
|
||||
def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys):
|
||||
"""When entities detected but total is 0, prints 'No entities' message."""
|
||||
|
||||
+20
-1
@@ -3,7 +3,7 @@ import json
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
from mempalace.config import MempalaceConfig, sanitize_kg_value, sanitize_name
|
||||
from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name
|
||||
|
||||
|
||||
def test_default_config():
|
||||
@@ -110,6 +110,25 @@ def test_init():
|
||||
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
||||
|
||||
|
||||
# --- normalize_wing_name ---
|
||||
|
||||
|
||||
def test_normalize_wing_name_hyphen():
|
||||
assert normalize_wing_name("mempal-private") == "mempal_private"
|
||||
|
||||
|
||||
def test_normalize_wing_name_space():
|
||||
assert normalize_wing_name("My Project") == "my_project"
|
||||
|
||||
|
||||
def test_normalize_wing_name_already_clean():
|
||||
assert normalize_wing_name("memorymark") == "memorymark"
|
||||
|
||||
|
||||
def test_normalize_wing_name_mixed():
|
||||
assert normalize_wing_name("My-Cool App") == "my_cool_app"
|
||||
|
||||
|
||||
# --- sanitize_name ---
|
||||
|
||||
|
||||
|
||||
@@ -67,6 +67,24 @@ def test_load_config_uses_defaults_when_yaml_missing():
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_load_config_no_yaml_normalizes_hyphenated_wing():
|
||||
"""Fallback wing name is normalized so it matches topics_by_wing keys.
|
||||
|
||||
Regression for the no-yaml branch of #1194: ``cmd_init`` writes
|
||||
``topics_by_wing`` under the normalized slug, so the miner's
|
||||
fallback wing must use the same normalization or the tunnel lookup
|
||||
misses every key for hyphenated dirnames.
|
||||
"""
|
||||
parent = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(parent) / "my-cool-app"
|
||||
project_root.mkdir()
|
||||
config = load_config(str(project_root))
|
||||
assert config["wing"] == "my_cool_app"
|
||||
finally:
|
||||
shutil.rmtree(parent)
|
||||
|
||||
|
||||
def test_scan_project_skips_mempalace_generated_files():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
Reference in New Issue
Block a user