Merge pull request #1195 from MemPalace/fix/wing-name-normalization-tunnels
fix(graph): normalize wing slug at init so topic tunnels fire for hyphenated dirs (#1194)
This commit is contained in:
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
|
- **Cross-wing topic tunnels for hyphenated dir names.** `mempalace init` recorded the `topics_by_wing` registry key under the raw directory name (e.g. `mempalace-public`), while `mempalace.yaml`'s `wing` field used the lower-cased + separator-collapsed slug (`mempalace_public`). At mine time the miner read the slug from the yaml and missed the registry, so `_compute_topic_tunnels_for_wing` returned `0` silently. Real-world: any project whose folder contained a hyphen or space lost every topic tunnel. Now both call sites route through a shared `normalize_wing_name()` in `config.py`. (#1194, follow-up to #1180)
|
||||||
- **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap.
|
- **CLI `mempalace search` retrieval quality.** The CLI was using pure ChromaDB cosine distance with no BM25 rerank, so drawers containing every query term but embedding as noise (directory listings, diff output, shell logs) scored `Match: 0.0` alongside genuinely irrelevant results with no way to tell them apart. Wired the CLI through the same `_hybrid_rank` the `mempalace_search` MCP tool already used, and surfaced both `cosine=` and `bm25=` scores in the output so users see which component of the match is firing. MCP search was unaffected; this fixes the human-facing CLI parity gap.
|
||||||
- **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179)
|
- **Legacy-palace distance-metric warning.** CLI search now detects palaces created before `hnsw:space=cosine` was consistently set and prints a one-line notice pointing at `mempalace repair`. Without the warning such palaces silently used L2 distance, under which the similarity display floored every result to `Match: 0.0`. New palaces mined today already set cosine correctly and now have invariant tests pinning that behavior so future refactors can't silently regress it. (#1179)
|
||||||
- **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182)
|
- **Graceful Ctrl-C during `mempalace mine`.** Interrupting a long mine no longer dumps a multi-frame `KeyboardInterrupt` traceback. The main file-processing loop now catches the signal, prints `files_processed: N/M`, `drawers_filed: K`, and `last_file:` so the user knows what landed, then exits with code 130 (standard SIGINT). Already-filed drawers are upserted idempotently on re-mine via deterministic IDs, so resuming is safe. The hooks PID lock at `~/.mempalace/hook_state/mine.pid` is now also actively cleaned up in a `finally` when its entry points at us — clean exit, error, or interrupt — preventing the next hook fire from briefly waiting on a stale PID. (#1182)
|
||||||
|
|||||||
+6
-5
@@ -329,13 +329,14 @@ def cmd_init(args):
|
|||||||
json.dump(confirmed, f, indent=2, ensure_ascii=False)
|
json.dump(confirmed, f, indent=2, ensure_ascii=False)
|
||||||
print(f" Entities saved: {entities_path}")
|
print(f" Entities saved: {entities_path}")
|
||||||
|
|
||||||
|
from .config import normalize_wing_name
|
||||||
from .miner import add_to_known_entities
|
from .miner import add_to_known_entities
|
||||||
|
|
||||||
# Wing matches the default produced by ``room_detector_local``
|
# Match the slug ``room_detector_local`` writes into
|
||||||
# (folder basename) and the miner fallback in ``load_config``.
|
# ``mempalace.yaml`` so the miner's tunnel lookup hits the
|
||||||
# Used by the topics_by_wing map so cross-wing tunnels can be
|
# same key in ``topics_by_wing`` at mine time (issue #1194 —
|
||||||
# computed at mine time.
|
# without this, hyphenated dirnames silently lose tunnels).
|
||||||
wing = project_path.name
|
wing = normalize_wing_name(project_path.name)
|
||||||
registry_path = add_to_known_entities(confirmed, wing=wing)
|
registry_path = add_to_known_entities(confirmed, wing=wing)
|
||||||
print(f" Registry updated: {registry_path}")
|
print(f" Registry updated: {registry_path}")
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -19,6 +19,16 @@ MAX_NAME_LENGTH = 128
|
|||||||
_SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$")
|
_SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_wing_name(name: str) -> str:
|
||||||
|
"""Lower-case + collapse separators (`-`, ` `) to `_` for wing slugs.
|
||||||
|
|
||||||
|
The same rule is applied by ``init`` when persisting `topics_by_wing`
|
||||||
|
and when writing `mempalace.yaml`, so the miner's lookup matches at
|
||||||
|
mine time regardless of the source dirname.
|
||||||
|
"""
|
||||||
|
return name.lower().replace(" ", "_").replace("-", "_")
|
||||||
|
|
||||||
|
|
||||||
def sanitize_name(value: str, field_name: str = "name") -> str:
|
def sanitize_name(value: str, field_name: str = "name") -> str:
|
||||||
"""Validate and sanitize a wing/room/entity name.
|
"""Validate and sanitize a wing/room/entity name.
|
||||||
|
|
||||||
|
|||||||
@@ -394,7 +394,9 @@ def mine_convos(
|
|||||||
|
|
||||||
convo_path = Path(convo_dir).expanduser().resolve()
|
convo_path = Path(convo_dir).expanduser().resolve()
|
||||||
if not wing:
|
if not wing:
|
||||||
wing = convo_path.name.lower().replace(" ", "_").replace("-", "_")
|
from .config import normalize_wing_name
|
||||||
|
|
||||||
|
wing = normalize_wing_name(convo_path.name)
|
||||||
|
|
||||||
files = scan_convos(convo_dir)
|
files = scan_convos(convo_dir)
|
||||||
if limit > 0:
|
if limit > 0:
|
||||||
|
|||||||
+9
-1
@@ -286,7 +286,15 @@ def load_config(project_dir: str) -> dict:
|
|||||||
if legacy_path.exists():
|
if legacy_path.exists():
|
||||||
config_path = legacy_path
|
config_path = legacy_path
|
||||||
else:
|
else:
|
||||||
wing_name = resolved_project_dir.name
|
from .config import normalize_wing_name
|
||||||
|
|
||||||
|
# Normalize the dirname-derived fallback wing the same way
|
||||||
|
# ``cmd_init`` and ``room_detector_local`` do — otherwise a
|
||||||
|
# hyphenated project mined without a yaml file lands under a
|
||||||
|
# raw-name wing while ``topics_by_wing`` was keyed under the
|
||||||
|
# normalized slug, silently dropping every topic tunnel
|
||||||
|
# (the no-yaml branch of issue #1194).
|
||||||
|
wing_name = normalize_wing_name(resolved_project_dir.name)
|
||||||
print(
|
print(
|
||||||
f" No mempalace.yaml found in {resolved_project_dir} "
|
f" No mempalace.yaml found in {resolved_project_dir} "
|
||||||
f"— using auto-detected defaults (wing='{wing_name}'). "
|
f"— using auto-detected defaults (wing='{wing_name}'). "
|
||||||
|
|||||||
@@ -303,8 +303,10 @@ def save_config(project_dir: str, project_name: str, rooms: list):
|
|||||||
|
|
||||||
def detect_rooms_local(project_dir: str, yes: bool = False):
|
def detect_rooms_local(project_dir: str, yes: bool = False):
|
||||||
"""Main entry point for local setup."""
|
"""Main entry point for local setup."""
|
||||||
|
from .config import normalize_wing_name
|
||||||
|
|
||||||
project_path = Path(project_dir).expanduser().resolve()
|
project_path = Path(project_dir).expanduser().resolve()
|
||||||
project_name = project_path.name.lower().replace(" ", "_").replace("-", "_")
|
project_name = normalize_wing_name(project_path.name)
|
||||||
|
|
||||||
if not project_path.exists():
|
if not project_path.exists():
|
||||||
print(f"ERROR: Directory not found: {project_dir}")
|
print(f"ERROR: Directory not found: {project_dir}")
|
||||||
|
|||||||
@@ -138,6 +138,43 @@ def test_cmd_init_with_entities(mock_config_cls, tmp_path):
|
|||||||
cmd_init(args)
|
cmd_init(args)
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.cli.MempalaceConfig")
|
||||||
|
def test_cmd_init_normalizes_wing_name_for_topics_registry(mock_config_cls, tmp_path):
|
||||||
|
"""Regression for #1194: hyphenated dir names must be normalized to the
|
||||||
|
same slug ``mempalace.yaml`` uses, otherwise ``topics_by_wing`` keys
|
||||||
|
miss the miner's lookup at mine time and tunnels are silently dropped.
|
||||||
|
"""
|
||||||
|
project = tmp_path / "my-cool-app"
|
||||||
|
project.mkdir()
|
||||||
|
fake_files = [project / "a.txt"]
|
||||||
|
detected = {
|
||||||
|
"people": [{"name": "Alice"}],
|
||||||
|
"projects": [],
|
||||||
|
"topics": [{"name": "Bun"}],
|
||||||
|
"uncertain": [],
|
||||||
|
}
|
||||||
|
confirmed = {"people": ["Alice"], "projects": [], "topics": ["Bun"]}
|
||||||
|
args = argparse.Namespace(dir=str(project), yes=True)
|
||||||
|
with (
|
||||||
|
patch("mempalace.entity_detector.scan_for_detection", return_value=fake_files),
|
||||||
|
patch("mempalace.entity_detector.detect_entities", return_value=detected),
|
||||||
|
patch("mempalace.entity_detector.confirm_entities", return_value=confirmed),
|
||||||
|
patch("mempalace.miner.add_to_known_entities") as mock_register,
|
||||||
|
patch("mempalace.room_detector_local.detect_rooms_local"),
|
||||||
|
patch("builtins.open", MagicMock()),
|
||||||
|
patch("mempalace.cli._maybe_run_mine_after_init"),
|
||||||
|
# Pass-zero corpus-origin detection runs unconditionally inside
|
||||||
|
# cmd_init now (#1221 / #1223). It accesses MempalaceConfig fields
|
||||||
|
# that don't survive MagicMock stringification, so stub it out —
|
||||||
|
# this test only cares about the wing-slug write to the registry.
|
||||||
|
patch("mempalace.cli._run_pass_zero", return_value=None),
|
||||||
|
):
|
||||||
|
mock_register.return_value = "/tmp/known_entities.json"
|
||||||
|
cmd_init(args)
|
||||||
|
mock_register.assert_called_once()
|
||||||
|
assert mock_register.call_args.kwargs["wing"] == "my_cool_app"
|
||||||
|
|
||||||
|
|
||||||
@patch("mempalace.cli.MempalaceConfig")
|
@patch("mempalace.cli.MempalaceConfig")
|
||||||
def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys):
|
def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys):
|
||||||
"""When entities detected but total is 0, prints 'No entities' message."""
|
"""When entities detected but total is 0, prints 'No entities' message."""
|
||||||
|
|||||||
+20
-1
@@ -3,7 +3,7 @@ import json
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from mempalace.config import MempalaceConfig, sanitize_kg_value, sanitize_name
|
from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name
|
||||||
|
|
||||||
|
|
||||||
def test_default_config():
|
def test_default_config():
|
||||||
@@ -110,6 +110,25 @@ def test_init():
|
|||||||
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
||||||
|
|
||||||
|
|
||||||
|
# --- normalize_wing_name ---
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_wing_name_hyphen():
|
||||||
|
assert normalize_wing_name("mempal-private") == "mempal_private"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_wing_name_space():
|
||||||
|
assert normalize_wing_name("My Project") == "my_project"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_wing_name_already_clean():
|
||||||
|
assert normalize_wing_name("memorymark") == "memorymark"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_wing_name_mixed():
|
||||||
|
assert normalize_wing_name("My-Cool App") == "my_cool_app"
|
||||||
|
|
||||||
|
|
||||||
# --- sanitize_name ---
|
# --- sanitize_name ---
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -67,6 +67,24 @@ def test_load_config_uses_defaults_when_yaml_missing():
|
|||||||
shutil.rmtree(tmpdir)
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_config_no_yaml_normalizes_hyphenated_wing():
|
||||||
|
"""Fallback wing name is normalized so it matches topics_by_wing keys.
|
||||||
|
|
||||||
|
Regression for the no-yaml branch of #1194: ``cmd_init`` writes
|
||||||
|
``topics_by_wing`` under the normalized slug, so the miner's
|
||||||
|
fallback wing must use the same normalization or the tunnel lookup
|
||||||
|
misses every key for hyphenated dirnames.
|
||||||
|
"""
|
||||||
|
parent = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(parent) / "my-cool-app"
|
||||||
|
project_root.mkdir()
|
||||||
|
config = load_config(str(project_root))
|
||||||
|
assert config["wing"] == "my_cool_app"
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(parent)
|
||||||
|
|
||||||
|
|
||||||
def test_scan_project_skips_mempalace_generated_files():
|
def test_scan_project_skips_mempalace_generated_files():
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
project_root = Path(tmpdir).resolve()
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|||||||
Reference in New Issue
Block a user