Merge pull request #1184 from MemPalace/feat/cross-wing-topic-tunnels
feat(graph): cross-wing tunnels by shared topics (#1180)
This commit is contained in:
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## [3.3.4] — unreleased
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **Cross-wing topic tunnels.** When two wings have confirmed `TOPIC` labels in common (the LLM-refine bucket from `mempalace init --llm`), the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). Tunnels are routed through the existing `create_tunnel` storage so they share dedup and persistence with explicit tunnels. Topic tunnels are stored under a synthetic `topic:<name>` room and tagged with `kind: "topic"` on the stored dict — this keeps them distinct from literal folder-derived rooms of the same name (a wing with both an `Angular` folder room and an `Angular` topic tunnel no longer collides at `follow_tunnels` read time) and gives LLMs scanning `list_tunnels` a visible discriminator. Threshold is configurable via `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` env var or `topic_tunnel_min_count` in `~/.mempalace/config.json` (default `1`). Manifest-dependency overlap and per-topic allow/deny lists remain out of scope. (#1180)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## [3.3.3] — 2026-04-23
|
## [3.3.3] — 2026-04-23
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|||||||
+18
-5
@@ -117,21 +117,34 @@ def cmd_init(args):
|
|||||||
if languages_tuple != ("en",):
|
if languages_tuple != ("en",):
|
||||||
print(f" Languages: {', '.join(languages_tuple)}")
|
print(f" Languages: {', '.join(languages_tuple)}")
|
||||||
detected = discover_entities(args.dir, languages=languages_tuple, llm_provider=llm_provider)
|
detected = discover_entities(args.dir, languages=languages_tuple, llm_provider=llm_provider)
|
||||||
total = len(detected["people"]) + len(detected["projects"]) + len(detected["uncertain"])
|
total = (
|
||||||
|
len(detected["people"])
|
||||||
|
+ len(detected["projects"])
|
||||||
|
+ len(detected.get("topics", []))
|
||||||
|
+ len(detected["uncertain"])
|
||||||
|
)
|
||||||
if total > 0:
|
if total > 0:
|
||||||
confirmed = confirm_entities(detected, yes=getattr(args, "yes", False))
|
confirmed = confirm_entities(detected, yes=getattr(args, "yes", False))
|
||||||
# Save confirmed entities to <project>/entities.json (per-project
|
# Save confirmed entities to <project>/entities.json (per-project
|
||||||
# audit trail — user can inspect or hand-edit) AND merge into the
|
# audit trail — user can inspect or hand-edit) AND merge into the
|
||||||
# global registry the miner reads at mine time.
|
# global registry the miner reads at mine time. Topics are kept
|
||||||
if confirmed["people"] or confirmed["projects"]:
|
# separately so the miner can later compute cross-wing tunnels
|
||||||
entities_path = Path(args.dir).expanduser().resolve() / "entities.json"
|
# from shared topics (see palace_graph.compute_topic_tunnels).
|
||||||
|
if confirmed["people"] or confirmed["projects"] or confirmed.get("topics"):
|
||||||
|
project_path = Path(args.dir).expanduser().resolve()
|
||||||
|
entities_path = project_path / "entities.json"
|
||||||
with open(entities_path, "w", encoding="utf-8") as f:
|
with open(entities_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(confirmed, f, indent=2, ensure_ascii=False)
|
json.dump(confirmed, f, indent=2, ensure_ascii=False)
|
||||||
print(f" Entities saved: {entities_path}")
|
print(f" Entities saved: {entities_path}")
|
||||||
|
|
||||||
from .miner import add_to_known_entities
|
from .miner import add_to_known_entities
|
||||||
|
|
||||||
registry_path = add_to_known_entities(confirmed)
|
# Wing matches the default produced by ``room_detector_local``
|
||||||
|
# (folder basename) and the miner fallback in ``load_config``.
|
||||||
|
# Used by the topics_by_wing map so cross-wing tunnels can be
|
||||||
|
# computed at mine time.
|
||||||
|
wing = project_path.name
|
||||||
|
registry_path = add_to_known_entities(confirmed, wing=wing)
|
||||||
print(f" Registry updated: {registry_path}")
|
print(f" Registry updated: {registry_path}")
|
||||||
else:
|
else:
|
||||||
print(" No entities detected — proceeding with directory-based rooms.")
|
print(" No entities detected — proceeding with directory-based rooms.")
|
||||||
|
|||||||
@@ -253,6 +253,32 @@ class MempalaceConfig:
|
|||||||
return env_val.strip().lower()
|
return env_val.strip().lower()
|
||||||
return str(self._file_config.get("embedding_device", "auto")).strip().lower()
|
return str(self._file_config.get("embedding_device", "auto")).strip().lower()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def topic_tunnel_min_count(self):
|
||||||
|
"""Minimum number of overlapping confirmed topics required to create
|
||||||
|
a cross-wing tunnel between two wings.
|
||||||
|
|
||||||
|
Default is ``1`` — any single shared topic produces a tunnel. Bump
|
||||||
|
to ``2+`` if your projects share lots of common-tech labels (Python,
|
||||||
|
Docker, Git) and you want only meaningfully overlapping wings to
|
||||||
|
link. Reads ``MEMPALACE_TOPIC_TUNNEL_MIN_COUNT`` env first, then the
|
||||||
|
config-file value, then ``1``.
|
||||||
|
"""
|
||||||
|
env_val = os.environ.get("MEMPALACE_TOPIC_TUNNEL_MIN_COUNT")
|
||||||
|
if env_val:
|
||||||
|
try:
|
||||||
|
parsed = int(env_val)
|
||||||
|
if parsed >= 1:
|
||||||
|
return parsed
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
cfg_val = self._file_config.get("topic_tunnel_min_count")
|
||||||
|
try:
|
||||||
|
parsed = int(cfg_val) if cfg_val is not None else 1
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
parsed = 1
|
||||||
|
return max(1, parsed)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hook_silent_save(self):
|
def hook_silent_save(self):
|
||||||
"""Whether the stop hook saves directly (True) or blocks for MCP calls (False)."""
|
"""Whether the stop hook saves directly (True) or blocks for MCP calls (False)."""
|
||||||
|
|||||||
@@ -440,7 +440,7 @@ def detect_entities(file_paths: list, max_files: int = 10, languages=("en",)) ->
|
|||||||
candidates = extract_candidates(combined_text, languages=langs)
|
candidates = extract_candidates(combined_text, languages=langs)
|
||||||
|
|
||||||
if not candidates:
|
if not candidates:
|
||||||
return {"people": [], "projects": [], "uncertain": []}
|
return {"people": [], "projects": [], "topics": [], "uncertain": []}
|
||||||
|
|
||||||
# Score and classify each candidate
|
# Score and classify each candidate
|
||||||
people = []
|
people = []
|
||||||
@@ -467,6 +467,7 @@ def detect_entities(file_paths: list, max_files: int = 10, languages=("en",)) ->
|
|||||||
return {
|
return {
|
||||||
"people": people[:15],
|
"people": people[:15],
|
||||||
"projects": projects[:10],
|
"projects": projects[:10],
|
||||||
|
"topics": [],
|
||||||
"uncertain": uncertain[:8],
|
"uncertain": uncertain[:8],
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -489,7 +490,13 @@ def confirm_entities(detected: dict, yes: bool = False) -> dict:
|
|||||||
"""
|
"""
|
||||||
Interactive confirmation step.
|
Interactive confirmation step.
|
||||||
User reviews detected entities, removes wrong ones, adds missing ones.
|
User reviews detected entities, removes wrong ones, adds missing ones.
|
||||||
Returns confirmed {people: [names], projects: [names]}
|
Returns confirmed {people: [names], projects: [names], topics: [names]}.
|
||||||
|
|
||||||
|
Topics are not surfaced for interactive review — they come from the
|
||||||
|
LLM-refined ``TOPIC`` bucket and are passed through verbatim. They
|
||||||
|
feed cross-wing tunnel computation at mine time (see
|
||||||
|
``palace_graph.compute_topic_tunnels``); a wrong topic at worst adds
|
||||||
|
a low-traffic tunnel and never alters drawer storage.
|
||||||
|
|
||||||
Pass yes=True to auto-accept all detected entities without prompting.
|
Pass yes=True to auto-accept all detected entities without prompting.
|
||||||
"""
|
"""
|
||||||
@@ -501,18 +508,28 @@ def confirm_entities(detected: dict, yes: bool = False) -> dict:
|
|||||||
_print_entity_list(detected["people"], "PEOPLE")
|
_print_entity_list(detected["people"], "PEOPLE")
|
||||||
_print_entity_list(detected["projects"], "PROJECTS")
|
_print_entity_list(detected["projects"], "PROJECTS")
|
||||||
|
|
||||||
|
if detected.get("topics"):
|
||||||
|
_print_entity_list(detected["topics"], "TOPICS (cross-wing tunnel signal)")
|
||||||
|
|
||||||
if detected["uncertain"]:
|
if detected["uncertain"]:
|
||||||
_print_entity_list(detected["uncertain"], "UNCERTAIN (need your call)")
|
_print_entity_list(detected["uncertain"], "UNCERTAIN (need your call)")
|
||||||
|
|
||||||
confirmed_people = [e["name"] for e in detected["people"]]
|
confirmed_people = [e["name"] for e in detected["people"]]
|
||||||
confirmed_projects = [e["name"] for e in detected["projects"]]
|
confirmed_projects = [e["name"] for e in detected["projects"]]
|
||||||
|
confirmed_topics = [e["name"] for e in detected.get("topics", [])]
|
||||||
|
|
||||||
if yes:
|
if yes:
|
||||||
# Auto-accept: include all detected (skip uncertain — ambiguous without user input)
|
# Auto-accept: include all detected (skip uncertain — ambiguous without user input)
|
||||||
print(
|
print(
|
||||||
f"\n Auto-accepting {len(confirmed_people)} people, {len(confirmed_projects)} projects."
|
f"\n Auto-accepting {len(confirmed_people)} people, "
|
||||||
|
f"{len(confirmed_projects)} projects, "
|
||||||
|
f"{len(confirmed_topics)} topics."
|
||||||
)
|
)
|
||||||
return {"people": confirmed_people, "projects": confirmed_projects}
|
return {
|
||||||
|
"people": confirmed_people,
|
||||||
|
"projects": confirmed_projects,
|
||||||
|
"topics": confirmed_topics,
|
||||||
|
}
|
||||||
|
|
||||||
print(f"\n{'─' * 58}")
|
print(f"\n{'─' * 58}")
|
||||||
print(" Options:")
|
print(" Options:")
|
||||||
@@ -570,11 +587,14 @@ def confirm_entities(detected: dict, yes: bool = False) -> dict:
|
|||||||
print(" Confirmed:")
|
print(" Confirmed:")
|
||||||
print(f" People: {', '.join(confirmed_people) or '(none)'}")
|
print(f" People: {', '.join(confirmed_people) or '(none)'}")
|
||||||
print(f" Projects: {', '.join(confirmed_projects) or '(none)'}")
|
print(f" Projects: {', '.join(confirmed_projects) or '(none)'}")
|
||||||
|
if confirmed_topics:
|
||||||
|
print(f" Topics: {', '.join(confirmed_topics)}")
|
||||||
print(f"{'=' * 58}\n")
|
print(f"{'=' * 58}\n")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"people": confirmed_people,
|
"people": confirmed_people,
|
||||||
"projects": confirmed_projects,
|
"projects": confirmed_projects,
|
||||||
|
"topics": confirmed_topics,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+14
-9
@@ -197,13 +197,23 @@ def _apply_classifications(
|
|||||||
"""Merge LLM decisions back into the detected dict.
|
"""Merge LLM decisions back into the detected dict.
|
||||||
|
|
||||||
Returns (new_detected, reclassified_count, dropped_count).
|
Returns (new_detected, reclassified_count, dropped_count).
|
||||||
|
|
||||||
|
Topics get their own bucket so the caller can persist them as
|
||||||
|
cross-wing tunnel signal. ``AMBIGUOUS`` still falls back to
|
||||||
|
``uncertain`` for human review.
|
||||||
"""
|
"""
|
||||||
label_to_bucket = {
|
label_to_bucket = {
|
||||||
"PERSON": "people",
|
"PERSON": "people",
|
||||||
"PROJECT": "projects",
|
"PROJECT": "projects",
|
||||||
"TOPIC": "uncertain",
|
"TOPIC": "topics",
|
||||||
"AMBIGUOUS": "uncertain",
|
"AMBIGUOUS": "uncertain",
|
||||||
}
|
}
|
||||||
|
bucket_to_type = {
|
||||||
|
"people": "person",
|
||||||
|
"projects": "project",
|
||||||
|
"topics": "topic",
|
||||||
|
"uncertain": "uncertain",
|
||||||
|
}
|
||||||
|
|
||||||
# Index every entity by name for in-place update
|
# Index every entity by name for in-place update
|
||||||
all_entries: list[tuple[str, dict]] = []
|
all_entries: list[tuple[str, dict]] = []
|
||||||
@@ -216,6 +226,7 @@ def _apply_classifications(
|
|||||||
new_detected: dict[str, list[dict]] = {
|
new_detected: dict[str, list[dict]] = {
|
||||||
"people": [],
|
"people": [],
|
||||||
"projects": [],
|
"projects": [],
|
||||||
|
"topics": [],
|
||||||
"uncertain": [],
|
"uncertain": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -223,7 +234,7 @@ def _apply_classifications(
|
|||||||
decision = decisions.get(entry["name"])
|
decision = decisions.get(entry["name"])
|
||||||
if decision is None:
|
if decision is None:
|
||||||
# No LLM opinion — keep as-is
|
# No LLM opinion — keep as-is
|
||||||
new_detected[old_bucket].append(entry)
|
new_detected.setdefault(old_bucket, []).append(entry)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
label, reason = decision
|
label, reason = decision
|
||||||
@@ -245,13 +256,7 @@ def _apply_classifications(
|
|||||||
updated["signals"] = signals
|
updated["signals"] = signals
|
||||||
if target_bucket != old_bucket:
|
if target_bucket != old_bucket:
|
||||||
reclassified += 1
|
reclassified += 1
|
||||||
updated["type"] = (
|
updated["type"] = bucket_to_type.get(target_bucket, "uncertain")
|
||||||
"person"
|
|
||||||
if target_bucket == "people"
|
|
||||||
else "project"
|
|
||||||
if target_bucket == "projects"
|
|
||||||
else "uncertain"
|
|
||||||
)
|
|
||||||
new_detected[target_bucket].append(updated)
|
new_detected[target_bucket].append(updated)
|
||||||
|
|
||||||
return new_detected, reclassified, dropped
|
return new_detected, reclassified, dropped
|
||||||
|
|||||||
+118
-2
@@ -439,7 +439,16 @@ def _refresh_known_entities_cache() -> None:
|
|||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
raw = data
|
raw = data
|
||||||
for cat in data.values():
|
for cat_key, cat in data.items():
|
||||||
|
# Special wing-keyed map — its inner values are topic
|
||||||
|
# names but its outer keys are wings, which must NOT be
|
||||||
|
# surfaced as known entities. Pull the topic names out
|
||||||
|
# explicitly instead of treating it as a generic category.
|
||||||
|
if cat_key == "topics_by_wing" and isinstance(cat, dict):
|
||||||
|
for topic_list in cat.values():
|
||||||
|
if isinstance(topic_list, list):
|
||||||
|
names.update(str(n) for n in topic_list if n)
|
||||||
|
continue
|
||||||
if isinstance(cat, list):
|
if isinstance(cat, list):
|
||||||
names.update(str(n) for n in cat if n)
|
names.update(str(n) for n in cat if n)
|
||||||
elif isinstance(cat, dict):
|
elif isinstance(cat, dict):
|
||||||
@@ -474,7 +483,39 @@ def _load_known_entities_raw() -> dict:
|
|||||||
return dict(_ENTITY_REGISTRY_CACHE["raw"])
|
return dict(_ENTITY_REGISTRY_CACHE["raw"])
|
||||||
|
|
||||||
|
|
||||||
def add_to_known_entities(entities_by_category: dict) -> str:
|
def _set_wing_topics(existing: dict, wing_key: str, topics_for_wing: list, coerce) -> None:
|
||||||
|
"""Update ``existing['topics_by_wing'][wing_key]`` to the deduped list.
|
||||||
|
|
||||||
|
Replaces (does not union) the wing's topic list — re-running ``init``
|
||||||
|
should reflect the user's latest confirmation rather than accumulate
|
||||||
|
stale labels. Empty input drops the wing entry; an empty map drops
|
||||||
|
the ``topics_by_wing`` key entirely.
|
||||||
|
"""
|
||||||
|
topics_map = existing.get("topics_by_wing")
|
||||||
|
if not isinstance(topics_map, dict):
|
||||||
|
topics_map = {}
|
||||||
|
seen_lower: set = set()
|
||||||
|
ordered: list = []
|
||||||
|
for n in topics_for_wing:
|
||||||
|
name = coerce(n)
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
key = name.lower()
|
||||||
|
if key in seen_lower:
|
||||||
|
continue
|
||||||
|
seen_lower.add(key)
|
||||||
|
ordered.append(name)
|
||||||
|
if ordered:
|
||||||
|
topics_map[wing_key] = ordered
|
||||||
|
else:
|
||||||
|
topics_map.pop(wing_key, None)
|
||||||
|
if topics_map:
|
||||||
|
existing["topics_by_wing"] = topics_map
|
||||||
|
else:
|
||||||
|
existing.pop("topics_by_wing", None)
|
||||||
|
|
||||||
|
|
||||||
|
def add_to_known_entities(entities_by_category: dict, wing: str = None) -> str:
|
||||||
"""Union ``entities_by_category`` into ``~/.mempalace/known_entities.json``.
|
"""Union ``entities_by_category`` into ``~/.mempalace/known_entities.json``.
|
||||||
|
|
||||||
Accepts ``{category: [names]}`` shape as produced by ``mempalace init``
|
Accepts ``{category: [names]}`` shape as produced by ``mempalace init``
|
||||||
@@ -488,6 +529,15 @@ def add_to_known_entities(entities_by_category: dict) -> str:
|
|||||||
added as keys with ``None`` values so existing code mappings aren't
|
added as keys with ``None`` values so existing code mappings aren't
|
||||||
overwritten. A later compress pass can assign codes.
|
overwritten. A later compress pass can assign codes.
|
||||||
|
|
||||||
|
When ``wing`` is provided AND ``entities_by_category`` contains a
|
||||||
|
``topics`` list, those topics are also recorded under
|
||||||
|
``topics_by_wing[wing]`` (case-insensitive dedup, preserving the
|
||||||
|
casing of the first observed name). This is the signal source for
|
||||||
|
``palace_graph.compute_topic_tunnels`` at mine time. Topics for a
|
||||||
|
wing are *replaced*, not unioned, so a re-run of ``init`` reflects
|
||||||
|
the user's latest confirmation rather than accumulating stale labels
|
||||||
|
indefinitely.
|
||||||
|
|
||||||
The in-process cache is invalidated on write so same-process callers
|
The in-process cache is invalidated on write so same-process callers
|
||||||
(notably ``cmd_init`` → ``cmd_mine`` in sequence) see the update
|
(notably ``cmd_init`` → ``cmd_mine`` in sequence) see the update
|
||||||
immediately instead of waiting for a mtime re-check.
|
immediately instead of waiting for a mtime re-check.
|
||||||
@@ -515,7 +565,16 @@ def add_to_known_entities(entities_by_category: dict) -> str:
|
|||||||
name = str(value)
|
name = str(value)
|
||||||
return name if name else None
|
return name if name else None
|
||||||
|
|
||||||
|
# Separate the topics_by_wing key from regular categories so we don't
|
||||||
|
# treat it as a flat name-list elsewhere in this function.
|
||||||
|
topics_for_wing = None
|
||||||
|
if wing and isinstance(wing, str) and wing.strip():
|
||||||
|
topics_for_wing = entities_by_category.get("topics") or []
|
||||||
|
|
||||||
for category, names in entities_by_category.items():
|
for category, names in entities_by_category.items():
|
||||||
|
if category == "topics_by_wing":
|
||||||
|
# Reserved key — managed separately below.
|
||||||
|
continue
|
||||||
if not isinstance(names, list) or not names:
|
if not isinstance(names, list) or not names:
|
||||||
continue
|
continue
|
||||||
current = existing.get(category)
|
current = existing.get(category)
|
||||||
@@ -551,6 +610,9 @@ def add_to_known_entities(entities_by_category: dict) -> str:
|
|||||||
ordered.append(name)
|
ordered.append(name)
|
||||||
existing[category] = ordered
|
existing[category] = ordered
|
||||||
|
|
||||||
|
if topics_for_wing is not None:
|
||||||
|
_set_wing_topics(existing, wing.strip(), topics_for_wing, _coerce_name)
|
||||||
|
|
||||||
registry_path.write_text(_json.dumps(existing, indent=2, ensure_ascii=False), encoding="utf-8")
|
registry_path.write_text(_json.dumps(existing, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||||
try:
|
try:
|
||||||
registry_path.chmod(0o600)
|
registry_path.chmod(0o600)
|
||||||
@@ -565,6 +627,28 @@ def add_to_known_entities(entities_by_category: dict) -> str:
|
|||||||
return str(registry_path)
|
return str(registry_path)
|
||||||
|
|
||||||
|
|
||||||
|
def get_topics_by_wing() -> dict:
|
||||||
|
"""Return ``topics_by_wing`` from the global registry as a dict.
|
||||||
|
|
||||||
|
Returns ``{}`` if the registry is missing, malformed, or has no
|
||||||
|
``topics_by_wing`` key. Casing is preserved from disk; callers that
|
||||||
|
need case-insensitive comparison should normalize themselves.
|
||||||
|
"""
|
||||||
|
raw = _load_known_entities_raw()
|
||||||
|
topics_map = raw.get("topics_by_wing")
|
||||||
|
if not isinstance(topics_map, dict):
|
||||||
|
return {}
|
||||||
|
out: dict = {}
|
||||||
|
for wing, topics in topics_map.items():
|
||||||
|
if not isinstance(wing, str) or not wing.strip():
|
||||||
|
continue
|
||||||
|
if isinstance(topics, list):
|
||||||
|
cleaned = [str(t) for t in topics if isinstance(t, str) and t.strip()]
|
||||||
|
if cleaned:
|
||||||
|
out[wing.strip()] = cleaned
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
_HALL_KEYWORDS_CACHE = None
|
_HALL_KEYWORDS_CACHE = None
|
||||||
|
|
||||||
|
|
||||||
@@ -962,6 +1046,19 @@ def mine(
|
|||||||
if not dry_run:
|
if not dry_run:
|
||||||
print(f" + [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}")
|
print(f" + [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}")
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
# Cross-wing topic tunnels: after every file in this wing has been
|
||||||
|
# processed, link this wing to any other wing that shares a
|
||||||
|
# confirmed TOPIC label. Out of scope for v1: manifest-dependency
|
||||||
|
# overlap, per-topic allow/deny lists, search-result surfacing.
|
||||||
|
try:
|
||||||
|
tunnels_added = _compute_topic_tunnels_for_wing(wing)
|
||||||
|
if tunnels_added:
|
||||||
|
print(f"\n Topic tunnels: +{tunnels_added} cross-wing link(s)")
|
||||||
|
except Exception as e:
|
||||||
|
# Tunnel computation must never fail a mine — degrade quietly.
|
||||||
|
print(f"\n WARNING: topic tunnel computation skipped — {e}", file=sys.stderr)
|
||||||
|
|
||||||
print(f"\n{'=' * 55}")
|
print(f"\n{'=' * 55}")
|
||||||
print(" Done.")
|
print(" Done.")
|
||||||
print(f" Files processed: {len(files) - files_skipped}")
|
print(f" Files processed: {len(files) - files_skipped}")
|
||||||
@@ -974,6 +1071,25 @@ def mine(
|
|||||||
print(f"{'=' * 55}\n")
|
print(f"{'=' * 55}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_topic_tunnels_for_wing(wing: str) -> int:
|
||||||
|
"""Drop tunnels between ``wing`` and every other wing that shares
|
||||||
|
confirmed topics, honoring the ``topic_tunnel_min_count`` config knob.
|
||||||
|
|
||||||
|
Returns the number of tunnels created or refreshed. Zero means no
|
||||||
|
overlap found (or the registry has no ``topics_by_wing`` map yet).
|
||||||
|
"""
|
||||||
|
from .config import MempalaceConfig
|
||||||
|
from .palace_graph import topic_tunnels_for_wing
|
||||||
|
|
||||||
|
topics_map = get_topics_by_wing()
|
||||||
|
if not topics_map or wing not in topics_map:
|
||||||
|
return 0
|
||||||
|
cfg = MempalaceConfig()
|
||||||
|
min_count = cfg.topic_tunnel_min_count
|
||||||
|
created = topic_tunnels_for_wing(wing, topics_map, min_count=min_count)
|
||||||
|
return len(created)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# STATUS
|
# STATUS
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -362,6 +362,7 @@ def create_tunnel(
|
|||||||
label: str = "",
|
label: str = "",
|
||||||
source_drawer_id: str = None,
|
source_drawer_id: str = None,
|
||||||
target_drawer_id: str = None,
|
target_drawer_id: str = None,
|
||||||
|
kind: str = "explicit",
|
||||||
):
|
):
|
||||||
"""Create an explicit (symmetric) tunnel between two locations in the palace.
|
"""Create an explicit (symmetric) tunnel between two locations in the palace.
|
||||||
|
|
||||||
@@ -382,6 +383,11 @@ def create_tunnel(
|
|||||||
label: Description of the connection.
|
label: Description of the connection.
|
||||||
source_drawer_id: Optional specific drawer ID.
|
source_drawer_id: Optional specific drawer ID.
|
||||||
target_drawer_id: Optional specific drawer ID.
|
target_drawer_id: Optional specific drawer ID.
|
||||||
|
kind: Tunnel category — ``"explicit"`` (default, user-created link
|
||||||
|
between real rooms) or ``"topic"`` (auto-generated cross-wing
|
||||||
|
topical link where rooms are synthetic ``topic:<name>``
|
||||||
|
identifiers). Preserved on the stored dict so readers can
|
||||||
|
distinguish real-room traversals from topic connections.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The stored tunnel dict.
|
The stored tunnel dict.
|
||||||
@@ -401,6 +407,7 @@ def create_tunnel(
|
|||||||
"source": {"wing": source_wing, "room": source_room},
|
"source": {"wing": source_wing, "room": source_room},
|
||||||
"target": {"wing": target_wing, "room": target_room},
|
"target": {"wing": target_wing, "room": target_room},
|
||||||
"label": label,
|
"label": label,
|
||||||
|
"kind": kind,
|
||||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||||
}
|
}
|
||||||
if source_drawer_id:
|
if source_drawer_id:
|
||||||
@@ -499,3 +506,159 @@ def follow_tunnels(wing: str, room: str, col=None, config=None):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
return connections
|
return connections
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# TOPIC TUNNELS — auto-link wings that share confirmed TOPIC labels
|
||||||
|
# =============================================================================
|
||||||
|
# When two wings have one or more confirmed topics in common (e.g. both
|
||||||
|
# discuss "Angular" or "OpenAPI"), drop a symmetric tunnel between them.
|
||||||
|
# Topics come from the LLM-refined ``TOPIC`` bucket in the per-project
|
||||||
|
# ``entities.json`` and are persisted by wing in
|
||||||
|
# ``~/.mempalace/known_entities.json`` under ``topics_by_wing``.
|
||||||
|
#
|
||||||
|
# Tunnels are created via the existing ``create_tunnel`` API so they share
|
||||||
|
# storage and dedup with explicit tunnels. The room is a synthetic
|
||||||
|
# ``topic:<original-casing>`` identifier — the ``topic:`` prefix namespaces
|
||||||
|
# these tunnels away from literal folder-derived rooms so a wing with an
|
||||||
|
# auto-detected "Angular" folder room and a "shared topic: Angular" tunnel
|
||||||
|
# remain distinct at ``follow_tunnels`` / ``list_tunnels`` time. The prefix
|
||||||
|
# is also visible to any LLM scanning the tunnel list. The ``kind: "topic"``
|
||||||
|
# field on the stored dict gives callers a machine-readable discriminator.
|
||||||
|
|
||||||
|
TOPIC_ROOM_PREFIX = "topic:"
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_topic(name: str) -> str:
|
||||||
|
"""Lowercase + strip topics for case-insensitive overlap detection."""
|
||||||
|
return str(name).strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
def topic_room(name: str) -> str:
|
||||||
|
"""Return the synthetic room identifier for a topic tunnel.
|
||||||
|
|
||||||
|
Prefixing avoids collisions with literal folder-derived rooms of the
|
||||||
|
same name (e.g. a wing that has both an "Angular" folder room and an
|
||||||
|
"Angular" topic tunnel).
|
||||||
|
"""
|
||||||
|
return f"{TOPIC_ROOM_PREFIX}{name}"
|
||||||
|
|
||||||
|
|
||||||
|
def compute_topic_tunnels(
|
||||||
|
topics_by_wing: dict,
|
||||||
|
min_count: int = 1,
|
||||||
|
label_prefix: str = "shared topic",
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Create tunnels for every pair of wings that share >= ``min_count`` topics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topics_by_wing: ``{wing_name: [topic_name, ...]}`` mapping. Topic
|
||||||
|
names are compared case-insensitively; the first observed
|
||||||
|
casing is used for the tunnel room name.
|
||||||
|
min_count: minimum number of overlapping topics required to drop
|
||||||
|
any tunnel between a wing pair. ``1`` means a single shared
|
||||||
|
topic is enough; bumping to e.g. ``2`` requires multiple
|
||||||
|
overlaps and filters out coincidental single-topic links.
|
||||||
|
label_prefix: human-readable string prefixed to the tunnel label.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tunnel dicts as returned by ``create_tunnel`` — one per
|
||||||
|
(wing_a, wing_b, topic) triple that crossed the threshold. A
|
||||||
|
wing-pair below ``min_count`` produces no tunnels at all (not
|
||||||
|
even for its single shared topic).
|
||||||
|
|
||||||
|
No-op semantics:
|
||||||
|
- empty/None ``topics_by_wing`` returns ``[]``.
|
||||||
|
- wings whose topic list is empty are skipped.
|
||||||
|
- ``min_count <= 0`` is clamped to 1.
|
||||||
|
"""
|
||||||
|
if not topics_by_wing:
|
||||||
|
return []
|
||||||
|
|
||||||
|
min_count = max(1, int(min_count))
|
||||||
|
|
||||||
|
# Build a normalized-topic -> first-seen casing map per wing so we
|
||||||
|
# preserve display casing while still doing case-insensitive overlap.
|
||||||
|
wing_topics: dict[str, dict[str, str]] = {}
|
||||||
|
for wing, names in topics_by_wing.items():
|
||||||
|
if not isinstance(wing, str) or not wing.strip():
|
||||||
|
continue
|
||||||
|
if not isinstance(names, (list, tuple)):
|
||||||
|
continue
|
||||||
|
bucket: dict[str, str] = {}
|
||||||
|
for n in names:
|
||||||
|
if not isinstance(n, str):
|
||||||
|
continue
|
||||||
|
key = _normalize_topic(n)
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
bucket.setdefault(key, n.strip())
|
||||||
|
if bucket:
|
||||||
|
wing_topics[wing.strip()] = bucket
|
||||||
|
|
||||||
|
wings = sorted(wing_topics.keys())
|
||||||
|
created: list[dict] = []
|
||||||
|
for i, wa in enumerate(wings):
|
||||||
|
topics_a = wing_topics[wa]
|
||||||
|
for wb in wings[i + 1 :]:
|
||||||
|
topics_b = wing_topics[wb]
|
||||||
|
shared_keys = set(topics_a.keys()) & set(topics_b.keys())
|
||||||
|
if len(shared_keys) < min_count:
|
||||||
|
continue
|
||||||
|
# Stable sort for deterministic tunnel ordering across runs.
|
||||||
|
for key in sorted(shared_keys):
|
||||||
|
# Prefer the casing from whichever wing sorts first — both
|
||||||
|
# are valid; this just keeps the displayed room consistent.
|
||||||
|
topic_name = topics_a[key] if topics_a[key] else topics_b[key]
|
||||||
|
room = topic_room(topic_name)
|
||||||
|
tunnel = create_tunnel(
|
||||||
|
source_wing=wa,
|
||||||
|
source_room=room,
|
||||||
|
target_wing=wb,
|
||||||
|
target_room=room,
|
||||||
|
label=f"{label_prefix}: {topic_name}",
|
||||||
|
kind="topic",
|
||||||
|
)
|
||||||
|
created.append(tunnel)
|
||||||
|
return created
|
||||||
|
|
||||||
|
|
||||||
|
def topic_tunnels_for_wing(
|
||||||
|
wing: str,
|
||||||
|
topics_by_wing: dict,
|
||||||
|
min_count: int = 1,
|
||||||
|
label_prefix: str = "shared topic",
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Compute topic tunnels involving a single wing.
|
||||||
|
|
||||||
|
Used by the miner to incrementally update tunnels for the wing that
|
||||||
|
just finished mining without recomputing pairs that don't involve it.
|
||||||
|
Returns the list of tunnels created or refreshed.
|
||||||
|
"""
|
||||||
|
if not topics_by_wing or not isinstance(wing, str) or not wing.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
wing = wing.strip()
|
||||||
|
own = topics_by_wing.get(wing)
|
||||||
|
if not isinstance(own, (list, tuple)) or not own:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Restrict the pair-wise computation to (wing, other) pairs only by
|
||||||
|
# building a 2-wing slice for each other wing. Reusing
|
||||||
|
# ``compute_topic_tunnels`` keeps the threshold and casing logic in
|
||||||
|
# one place.
|
||||||
|
created: list[dict] = []
|
||||||
|
for other, other_topics in topics_by_wing.items():
|
||||||
|
if not isinstance(other, str) or not other.strip() or other == wing:
|
||||||
|
continue
|
||||||
|
if not isinstance(other_topics, (list, tuple)) or not other_topics:
|
||||||
|
continue
|
||||||
|
slice_map = {wing: list(own), other: list(other_topics)}
|
||||||
|
created.extend(
|
||||||
|
compute_topic_tunnels(
|
||||||
|
slice_map,
|
||||||
|
min_count=min_count,
|
||||||
|
label_prefix=label_prefix,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return created
|
||||||
|
|||||||
@@ -558,6 +558,7 @@ def to_detected_dict(
|
|||||||
return {
|
return {
|
||||||
"people": people_entries,
|
"people": people_entries,
|
||||||
"projects": proj_entries,
|
"projects": proj_entries,
|
||||||
|
"topics": [],
|
||||||
"uncertain": [],
|
"uncertain": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -577,7 +578,7 @@ def _merge_detected(primary: dict, secondary: dict, drop_secondary_uncertain: bo
|
|||||||
"""
|
"""
|
||||||
seen = {e["name"].lower() for cat in primary.values() for e in cat}
|
seen = {e["name"].lower() for cat in primary.values() for e in cat}
|
||||||
merged = {k: list(v) for k, v in primary.items()}
|
merged = {k: list(v) for k, v in primary.items()}
|
||||||
for cat_key in ("people", "projects", "uncertain"):
|
for cat_key in ("people", "projects", "topics", "uncertain"):
|
||||||
if cat_key == "uncertain" and drop_secondary_uncertain:
|
if cat_key == "uncertain" and drop_secondary_uncertain:
|
||||||
continue
|
continue
|
||||||
for e in secondary.get(cat_key, []):
|
for e in secondary.get(cat_key, []):
|
||||||
@@ -654,7 +655,7 @@ def discover_entities(
|
|||||||
prose_detected = (
|
prose_detected = (
|
||||||
detect_entities(prose_files, languages=languages)
|
detect_entities(prose_files, languages=languages)
|
||||||
if prose_files
|
if prose_files
|
||||||
else {"people": [], "projects": [], "uncertain": []}
|
else {"people": [], "projects": [], "topics": [], "uncertain": []}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Without LLM refinement, suppress regex "uncertain" noise when real
|
# Without LLM refinement, suppress regex "uncertain" noise when real
|
||||||
|
|||||||
@@ -235,13 +235,13 @@ def test_detect_entities_empty_files(tmp_path):
|
|||||||
f = tmp_path / "empty.txt"
|
f = tmp_path / "empty.txt"
|
||||||
f.write_text("")
|
f.write_text("")
|
||||||
result = detect_entities([f])
|
result = detect_entities([f])
|
||||||
assert result == {"people": [], "projects": [], "uncertain": []}
|
assert result == {"people": [], "projects": [], "topics": [], "uncertain": []}
|
||||||
|
|
||||||
|
|
||||||
def test_detect_entities_handles_missing_file(tmp_path):
|
def test_detect_entities_handles_missing_file(tmp_path):
|
||||||
missing = tmp_path / "nonexistent.txt"
|
missing = tmp_path / "nonexistent.txt"
|
||||||
result = detect_entities([missing])
|
result = detect_entities([missing])
|
||||||
assert result == {"people": [], "projects": [], "uncertain": []}
|
assert result == {"people": [], "projects": [], "topics": [], "uncertain": []}
|
||||||
|
|
||||||
|
|
||||||
def test_detect_entities_respects_max_files(tmp_path):
|
def test_detect_entities_respects_max_files(tmp_path):
|
||||||
|
|||||||
@@ -206,3 +206,71 @@ def test_populated_registry_improves_miner_recall(temp_registry):
|
|||||||
# All four registered entities should land in the metadata string
|
# All four registered entities should land in the metadata string
|
||||||
for expected in ("Julia Grib", "Kevin Heifner", "hyperion-history", "mempalace"):
|
for expected in ("Julia Grib", "Kevin Heifner", "hyperion-history", "mempalace"):
|
||||||
assert expected in tagged, f"expected '{expected}' in metadata {tagged!r}"
|
assert expected in tagged, f"expected '{expected}' in metadata {tagged!r}"
|
||||||
|
|
||||||
|
|
||||||
|
# ── topics_by_wing — cross-wing tunnel signal source (issue #1180) ──
|
||||||
|
|
||||||
|
|
||||||
|
def test_topics_persisted_under_topics_by_wing(temp_registry):
|
||||||
|
miner.add_to_known_entities(
|
||||||
|
{"people": ["Alice"], "topics": ["Angular", "OpenAPI"]},
|
||||||
|
wing="wing_alpha",
|
||||||
|
)
|
||||||
|
data = json.loads(temp_registry.read_text())
|
||||||
|
# Topics also stored as a flat list (existing-style aggregate).
|
||||||
|
assert "Angular" in data["topics"]
|
||||||
|
# And recorded by wing for tunnel computation.
|
||||||
|
assert data["topics_by_wing"]["wing_alpha"] == ["Angular", "OpenAPI"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_topics_by_wing_replaces_on_reinit(temp_registry):
|
||||||
|
"""Re-running init for the same wing should reflect the latest list,
|
||||||
|
not accumulate stale topics indefinitely."""
|
||||||
|
miner.add_to_known_entities({"topics": ["Angular", "OpenAPI"]}, wing="wing_alpha")
|
||||||
|
miner.add_to_known_entities({"topics": ["OpenAPI", "Postgres"]}, wing="wing_alpha")
|
||||||
|
data = json.loads(temp_registry.read_text())
|
||||||
|
assert data["topics_by_wing"]["wing_alpha"] == ["OpenAPI", "Postgres"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_topics_by_wing_multiple_wings_coexist(temp_registry):
|
||||||
|
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_a")
|
||||||
|
miner.add_to_known_entities({"topics": ["foo", "bar"]}, wing="wing_b")
|
||||||
|
data = json.loads(temp_registry.read_text())
|
||||||
|
assert data["topics_by_wing"] == {"wing_a": ["foo"], "wing_b": ["foo", "bar"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_topics_by_wing_skipped_without_wing(temp_registry):
|
||||||
|
miner.add_to_known_entities({"topics": ["foo"]})
|
||||||
|
data = json.loads(temp_registry.read_text())
|
||||||
|
# No wing → no topics_by_wing entry, but topics list still saved.
|
||||||
|
assert "topics_by_wing" not in data
|
||||||
|
assert data["topics"] == ["foo"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_topics_by_wing_dedupes_case_insensitive(temp_registry):
|
||||||
|
miner.add_to_known_entities({"topics": ["OpenAPI", "openapi", "OPENAPI"]}, wing="wing_a")
|
||||||
|
data = json.loads(temp_registry.read_text())
|
||||||
|
# Only one entry, casing of the first observed name preserved.
|
||||||
|
assert data["topics_by_wing"]["wing_a"] == ["OpenAPI"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_topics_by_wing_reads_registry(temp_registry):
|
||||||
|
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_a")
|
||||||
|
miner.add_to_known_entities({"topics": ["foo", "bar"]}, wing="wing_b")
|
||||||
|
result = miner.get_topics_by_wing()
|
||||||
|
assert result == {"wing_a": ["foo"], "wing_b": ["foo", "bar"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_topics_by_wing_empty_when_missing(temp_registry):
|
||||||
|
miner.add_to_known_entities({"people": ["Alice"]})
|
||||||
|
assert miner.get_topics_by_wing() == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_topics_by_wing_does_not_pollute_known_names(temp_registry):
|
||||||
|
"""Wing names in topics_by_wing must NOT leak into the flat known-names
|
||||||
|
set used by ``_extract_entities_for_metadata`` — only the topic strings
|
||||||
|
themselves should be recognized."""
|
||||||
|
miner.add_to_known_entities({"topics": ["Angular"]}, wing="wing_super_secret_project")
|
||||||
|
known = miner._load_known_entities()
|
||||||
|
assert "Angular" in known
|
||||||
|
assert "wing_super_secret_project" not in known
|
||||||
|
|||||||
@@ -272,7 +272,9 @@ def test_apply_classifications_appends_reason_signal():
|
|||||||
assert any("spoken of by name" in s for s in new["people"][0]["signals"])
|
assert any("spoken of by name" in s for s in new["people"][0]["signals"])
|
||||||
|
|
||||||
|
|
||||||
def test_apply_classifications_topic_goes_to_uncertain():
|
def test_apply_classifications_topic_goes_to_topics_bucket():
|
||||||
|
"""TOPIC classifications now route to a dedicated ``topics`` bucket so the
|
||||||
|
miner can use them as cross-wing tunnel signal (issue #1180)."""
|
||||||
detected = {
|
detected = {
|
||||||
"people": [],
|
"people": [],
|
||||||
"projects": [
|
"projects": [
|
||||||
@@ -289,8 +291,32 @@ def test_apply_classifications_topic_goes_to_uncertain():
|
|||||||
decisions = {"Paris": ("TOPIC", "city, not a project")}
|
decisions = {"Paris": ("TOPIC", "city, not a project")}
|
||||||
new, reclass, _ = _apply_classifications(detected, decisions)
|
new, reclass, _ = _apply_classifications(detected, decisions)
|
||||||
assert len(new["projects"]) == 0
|
assert len(new["projects"]) == 0
|
||||||
|
assert len(new["uncertain"]) == 0
|
||||||
|
assert len(new["topics"]) == 1
|
||||||
|
assert new["topics"][0]["name"] == "Paris"
|
||||||
|
assert new["topics"][0]["type"] == "topic"
|
||||||
|
assert reclass == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_classifications_ambiguous_still_goes_to_uncertain():
|
||||||
|
detected = {
|
||||||
|
"people": [],
|
||||||
|
"projects": [
|
||||||
|
{
|
||||||
|
"name": "Foo",
|
||||||
|
"type": "project",
|
||||||
|
"confidence": 0.7,
|
||||||
|
"frequency": 5,
|
||||||
|
"signals": ["regex"],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"uncertain": [],
|
||||||
|
}
|
||||||
|
decisions = {"Foo": ("AMBIGUOUS", "context insufficient")}
|
||||||
|
new, reclass, _ = _apply_classifications(detected, decisions)
|
||||||
|
assert len(new["projects"]) == 0
|
||||||
assert len(new["uncertain"]) == 1
|
assert len(new["uncertain"]) == 1
|
||||||
assert new["uncertain"][0]["name"] == "Paris"
|
assert new["uncertain"][0]["name"] == "Foo"
|
||||||
assert reclass == 1
|
assert reclass == 1
|
||||||
|
|
||||||
|
|
||||||
@@ -469,7 +495,9 @@ def test_refine_entities_refines_high_confidence_regex_projects():
|
|||||||
assert provider.call_count == 1
|
assert provider.call_count == 1
|
||||||
assert result.reclassified == 1
|
assert result.reclassified == 1
|
||||||
assert result.merged["projects"] == []
|
assert result.merged["projects"] == []
|
||||||
assert result.merged["uncertain"][0]["name"] == "OpenAPI"
|
# TOPIC labels go to the dedicated ``topics`` bucket so the miner can
|
||||||
|
# use them for cross-wing tunnel computation (issue #1180).
|
||||||
|
assert result.merged["topics"][0]["name"] == "OpenAPI"
|
||||||
|
|
||||||
|
|
||||||
def test_refine_entities_refines_regex_people_but_skips_git_people():
|
def test_refine_entities_refines_regex_people_but_skips_git_people():
|
||||||
|
|||||||
@@ -496,3 +496,107 @@ def test_add_drawer_stamps_normalize_version(tmp_path):
|
|||||||
assert meta["normalize_version"] == NORMALIZE_VERSION
|
assert meta["normalize_version"] == NORMALIZE_VERSION
|
||||||
finally:
|
finally:
|
||||||
del col, client
|
del col, client
|
||||||
|
|
||||||
|
|
||||||
|
def test_mine_creates_topic_tunnels_for_shared_topics(tmp_path, monkeypatch):
|
||||||
|
"""End-to-end: when two wings have already-confirmed topics that overlap,
|
||||||
|
the miner's mine-time pass drops a cross-wing tunnel between them.
|
||||||
|
|
||||||
|
Issue #1180.
|
||||||
|
"""
|
||||||
|
from mempalace import miner, palace_graph
|
||||||
|
|
||||||
|
# Redirect both the registry and tunnel-storage paths into tmp_path
|
||||||
|
# so we never touch the developer's real ~/.mempalace directory.
|
||||||
|
registry = tmp_path / "known_entities.json"
|
||||||
|
monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
|
||||||
|
miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
|
||||||
|
tunnels_file = tmp_path / "tunnels.json"
|
||||||
|
monkeypatch.setattr(palace_graph, "_TUNNEL_FILE", str(tunnels_file))
|
||||||
|
|
||||||
|
# Pre-populate the registry as if init had been run for two wings that
|
||||||
|
# share a topic.
|
||||||
|
miner.add_to_known_entities({"topics": ["foo", "bar"]}, wing="wing_one")
|
||||||
|
miner.add_to_known_entities({"topics": ["foo", "baz"]}, wing="wing_two")
|
||||||
|
|
||||||
|
# Mine wing_two — should drop tunnels between wing_two and wing_one
|
||||||
|
# for every shared topic. Just one in this case.
|
||||||
|
project_root = tmp_path / "wing_two_project"
|
||||||
|
project_root.mkdir()
|
||||||
|
write_file(
|
||||||
|
project_root / "notes.md",
|
||||||
|
"Some prose long enough to make a chunk. " * 20,
|
||||||
|
)
|
||||||
|
with open(project_root / "mempalace.yaml", "w") as f:
|
||||||
|
yaml.dump({"wing": "wing_two", "rooms": [{"name": "general"}]}, f)
|
||||||
|
|
||||||
|
palace_path = tmp_path / "palace"
|
||||||
|
mine(str(project_root), str(palace_path))
|
||||||
|
|
||||||
|
listed = palace_graph.list_tunnels()
|
||||||
|
assert len(listed) == 1
|
||||||
|
rooms = {listed[0]["source"]["room"], listed[0]["target"]["room"]}
|
||||||
|
# Topic tunnels use a ``topic:<name>`` synthetic room so they can't
|
||||||
|
# collide with literal folder-derived rooms of the same name.
|
||||||
|
assert rooms == {"topic:foo"}
|
||||||
|
assert listed[0]["kind"] == "topic"
|
||||||
|
wings = {listed[0]["source"]["wing"], listed[0]["target"]["wing"]}
|
||||||
|
assert wings == {"wing_one", "wing_two"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_mine_no_tunnel_when_threshold_blocks_overlap(tmp_path, monkeypatch):
|
||||||
|
"""Bumping ``MEMPALACE_TOPIC_TUNNEL_MIN_COUNT`` above the actual overlap
|
||||||
|
suppresses tunnel creation."""
|
||||||
|
from mempalace import miner, palace_graph
|
||||||
|
|
||||||
|
registry = tmp_path / "known_entities.json"
|
||||||
|
monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
|
||||||
|
miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
|
||||||
|
tunnels_file = tmp_path / "tunnels.json"
|
||||||
|
monkeypatch.setattr(palace_graph, "_TUNNEL_FILE", str(tunnels_file))
|
||||||
|
monkeypatch.setenv("MEMPALACE_TOPIC_TUNNEL_MIN_COUNT", "2")
|
||||||
|
|
||||||
|
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_one")
|
||||||
|
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_two")
|
||||||
|
|
||||||
|
project_root = tmp_path / "wing_two_project"
|
||||||
|
project_root.mkdir()
|
||||||
|
write_file(
|
||||||
|
project_root / "notes.md",
|
||||||
|
"Some prose long enough to make a chunk. " * 20,
|
||||||
|
)
|
||||||
|
with open(project_root / "mempalace.yaml", "w") as f:
|
||||||
|
yaml.dump({"wing": "wing_two", "rooms": [{"name": "general"}]}, f)
|
||||||
|
|
||||||
|
palace_path = tmp_path / "palace"
|
||||||
|
mine(str(project_root), str(palace_path))
|
||||||
|
|
||||||
|
# min_count=2 but only 1 shared topic → no tunnel.
|
||||||
|
assert palace_graph.list_tunnels() == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_mine_no_tunnel_when_only_one_wing_has_topics(tmp_path, monkeypatch):
|
||||||
|
"""A wing in isolation (no other wing has confirmed topics) creates no tunnels."""
|
||||||
|
from mempalace import miner, palace_graph
|
||||||
|
|
||||||
|
registry = tmp_path / "known_entities.json"
|
||||||
|
monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
|
||||||
|
miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
|
||||||
|
tunnels_file = tmp_path / "tunnels.json"
|
||||||
|
monkeypatch.setattr(palace_graph, "_TUNNEL_FILE", str(tunnels_file))
|
||||||
|
|
||||||
|
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_one")
|
||||||
|
|
||||||
|
project_root = tmp_path / "wing_one_project"
|
||||||
|
project_root.mkdir()
|
||||||
|
write_file(
|
||||||
|
project_root / "notes.md",
|
||||||
|
"Some prose long enough to make a chunk. " * 20,
|
||||||
|
)
|
||||||
|
with open(project_root / "mempalace.yaml", "w") as f:
|
||||||
|
yaml.dump({"wing": "wing_one", "rooms": [{"name": "general"}]}, f)
|
||||||
|
|
||||||
|
palace_path = tmp_path / "palace"
|
||||||
|
mine(str(project_root), str(palace_path))
|
||||||
|
|
||||||
|
assert palace_graph.list_tunnels() == []
|
||||||
|
|||||||
@@ -135,3 +135,167 @@ class TestExplicitTunnels:
|
|||||||
connections = palace_graph.follow_tunnels("wing_code", "auth", col=col)
|
connections = palace_graph.follow_tunnels("wing_code", "auth", col=col)
|
||||||
assert len(connections) == 1
|
assert len(connections) == 1
|
||||||
assert "drawer_preview" not in connections[0]
|
assert "drawer_preview" not in connections[0]
|
||||||
|
|
||||||
|
|
||||||
|
class TestTopicTunnels:
|
||||||
|
"""Cross-wing topic tunnels (issue #1180).
|
||||||
|
|
||||||
|
When two wings share confirmed TOPIC labels above a configurable
|
||||||
|
threshold, a symmetric tunnel is created between them. Tunnels are
|
||||||
|
routed through the existing ``create_tunnel`` storage so they share
|
||||||
|
dedup and persistence with explicit tunnels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_compute_topic_tunnels_creates_link_for_shared_topic(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {
|
||||||
|
"wing_alpha": ["Angular", "OpenAPI"],
|
||||||
|
"wing_beta": ["OpenAPI", "Kubernetes"],
|
||||||
|
}
|
||||||
|
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||||
|
assert len(created) == 1
|
||||||
|
assert created[0]["source"]["wing"] in {"wing_alpha", "wing_beta"}
|
||||||
|
assert created[0]["target"]["wing"] in {"wing_alpha", "wing_beta"}
|
||||||
|
# Room is namespaced with the ``topic:`` prefix so it can't collide
|
||||||
|
# with a literal folder-derived room of the same name. Casing of the
|
||||||
|
# topic is preserved for display.
|
||||||
|
assert created[0]["source"]["room"] == "topic:OpenAPI"
|
||||||
|
assert created[0]["target"]["room"] == "topic:OpenAPI"
|
||||||
|
assert created[0]["kind"] == "topic"
|
||||||
|
# Label carries the human-readable topic without the prefix.
|
||||||
|
assert "OpenAPI" in created[0]["label"]
|
||||||
|
assert "topic:OpenAPI" not in created[0]["label"]
|
||||||
|
|
||||||
|
# Tunnel is retrievable via the standard list_tunnels API.
|
||||||
|
listed = palace_graph.list_tunnels()
|
||||||
|
assert len(listed) == 1
|
||||||
|
assert listed[0]["id"] == created[0]["id"]
|
||||||
|
|
||||||
|
def test_compute_topic_tunnels_no_link_below_threshold(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {
|
||||||
|
"wing_alpha": ["Angular", "OpenAPI"],
|
||||||
|
"wing_beta": ["OpenAPI", "Kubernetes"],
|
||||||
|
}
|
||||||
|
# min_count=2 requires two overlapping topics — only one shared.
|
||||||
|
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=2)
|
||||||
|
assert created == []
|
||||||
|
assert palace_graph.list_tunnels() == []
|
||||||
|
|
||||||
|
def test_compute_topic_tunnels_above_threshold_creates_per_topic_links(
|
||||||
|
self, tmp_path, monkeypatch
|
||||||
|
):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {
|
||||||
|
"wing_alpha": ["Angular", "OpenAPI", "Postgres"],
|
||||||
|
"wing_beta": ["Angular", "OpenAPI", "Redis"],
|
||||||
|
}
|
||||||
|
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=2)
|
||||||
|
# Two shared topics × one wing pair = two tunnels.
|
||||||
|
rooms = sorted(t["source"]["room"] for t in created)
|
||||||
|
assert rooms == ["topic:Angular", "topic:OpenAPI"]
|
||||||
|
|
||||||
|
def test_compute_topic_tunnels_case_insensitive_overlap(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {
|
||||||
|
"wing_alpha": ["openapi"],
|
||||||
|
"wing_beta": ["OpenAPI"],
|
||||||
|
}
|
||||||
|
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||||
|
assert len(created) == 1
|
||||||
|
|
||||||
|
def test_compute_topic_tunnels_empty_input_is_noop(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
assert palace_graph.compute_topic_tunnels({}) == []
|
||||||
|
assert palace_graph.compute_topic_tunnels({"wing_a": []}) == []
|
||||||
|
assert palace_graph.list_tunnels() == []
|
||||||
|
|
||||||
|
def test_compute_topic_tunnels_three_wings_pairwise(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {
|
||||||
|
"wing_a": ["foo"],
|
||||||
|
"wing_b": ["foo"],
|
||||||
|
"wing_c": ["foo"],
|
||||||
|
}
|
||||||
|
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||||
|
# 3 wings sharing the same topic → C(3,2) = 3 pairs → 3 tunnels.
|
||||||
|
assert len(created) == 3
|
||||||
|
endpoint_pairs = {
|
||||||
|
tuple(sorted([t["source"]["wing"], t["target"]["wing"]])) for t in created
|
||||||
|
}
|
||||||
|
assert endpoint_pairs == {
|
||||||
|
("wing_a", "wing_b"),
|
||||||
|
("wing_a", "wing_c"),
|
||||||
|
("wing_b", "wing_c"),
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_topic_tunnels_for_wing_only_links_that_wing(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {
|
||||||
|
"wing_a": ["foo", "bar"],
|
||||||
|
"wing_b": ["foo"],
|
||||||
|
"wing_c": ["bar"],
|
||||||
|
}
|
||||||
|
# wing_a should link to both b (via foo) and c (via bar).
|
||||||
|
created = palace_graph.topic_tunnels_for_wing("wing_a", topics_by_wing)
|
||||||
|
endpoint_pairs = {
|
||||||
|
tuple(sorted([t["source"]["wing"], t["target"]["wing"]])) for t in created
|
||||||
|
}
|
||||||
|
assert endpoint_pairs == {("wing_a", "wing_b"), ("wing_a", "wing_c")}
|
||||||
|
# The b-c pair is NOT created because wing_a's incremental pass
|
||||||
|
# only computes pairs that include wing_a.
|
||||||
|
assert len(palace_graph.list_tunnels()) == 2
|
||||||
|
|
||||||
|
def test_topic_tunnels_for_wing_unknown_wing_is_noop(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {"wing_a": ["foo"], "wing_b": ["foo"]}
|
||||||
|
assert palace_graph.topic_tunnels_for_wing("wing_missing", topics_by_wing) == []
|
||||||
|
assert palace_graph.list_tunnels() == []
|
||||||
|
|
||||||
|
def test_compute_topic_tunnels_dedupe_on_recompute(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
topics_by_wing = {
|
||||||
|
"wing_alpha": ["OpenAPI"],
|
||||||
|
"wing_beta": ["OpenAPI"],
|
||||||
|
}
|
||||||
|
first = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||||
|
second = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||||
|
# create_tunnel is symmetric/dedupe — repeated computation should
|
||||||
|
# not multiply the stored tunnels.
|
||||||
|
assert first[0]["id"] == second[0]["id"]
|
||||||
|
assert len(palace_graph.list_tunnels()) == 1
|
||||||
|
|
||||||
|
def test_topic_tunnel_room_does_not_collide_with_literal_room(self, tmp_path, monkeypatch):
|
||||||
|
"""Regression: a literal "Angular" folder-room and a topic tunnel
|
||||||
|
for "Angular" must resolve to distinct endpoints so ``follow_tunnels``
|
||||||
|
from the real room doesn't accidentally surface topic connections
|
||||||
|
(issue raised in review of #1184)."""
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
|
||||||
|
# Explicit tunnel anchored at a literal "Angular" room in wing_alpha.
|
||||||
|
palace_graph.create_tunnel(
|
||||||
|
"wing_alpha", "Angular", "wing_gamma", "frontend", label="explicit"
|
||||||
|
)
|
||||||
|
# Topic tunnel between the same wings that share the "Angular" topic.
|
||||||
|
palace_graph.compute_topic_tunnels(
|
||||||
|
{"wing_alpha": ["Angular"], "wing_beta": ["Angular"]}, min_count=1
|
||||||
|
)
|
||||||
|
|
||||||
|
# follow_tunnels on the literal Angular room only sees the explicit link.
|
||||||
|
literal = palace_graph.follow_tunnels("wing_alpha", "Angular")
|
||||||
|
assert len(literal) == 1
|
||||||
|
assert literal[0]["connected_wing"] == "wing_gamma"
|
||||||
|
|
||||||
|
# The topic tunnel is stored under the namespaced room.
|
||||||
|
topical = palace_graph.follow_tunnels("wing_alpha", "topic:Angular")
|
||||||
|
assert len(topical) == 1
|
||||||
|
assert topical[0]["connected_wing"] == "wing_beta"
|
||||||
|
|
||||||
|
def test_topic_tunnels_carry_kind_field(self, tmp_path, monkeypatch):
|
||||||
|
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||||
|
palace_graph.create_tunnel("wing_a", "auth", "wing_b", "users", label="x")
|
||||||
|
palace_graph.compute_topic_tunnels({"wing_a": ["Redis"], "wing_b": ["Redis"]}, min_count=1)
|
||||||
|
|
||||||
|
tunnels = palace_graph.list_tunnels()
|
||||||
|
kinds = sorted(t["kind"] for t in tunnels)
|
||||||
|
assert kinds == ["explicit", "topic"]
|
||||||
|
|||||||
@@ -363,11 +363,14 @@ def test_to_detected_dict_shape():
|
|||||||
projects = [ProjectInfo(name="p", repo_root=Path("."), is_mine=True, manifest="package.json")]
|
projects = [ProjectInfo(name="p", repo_root=Path("."), is_mine=True, manifest="package.json")]
|
||||||
people = [PersonInfo(name="Jane Doe", total_commits=5, repos={"r"})]
|
people = [PersonInfo(name="Jane Doe", total_commits=5, repos={"r"})]
|
||||||
d = to_detected_dict(projects, people)
|
d = to_detected_dict(projects, people)
|
||||||
assert set(d.keys()) == {"people", "projects", "uncertain"}
|
# ``topics`` is the LLM-refine bucket for cross-wing tunnel signal —
|
||||||
|
# always present even when empty so callers can rely on the shape.
|
||||||
|
assert set(d.keys()) == {"people", "projects", "topics", "uncertain"}
|
||||||
assert d["projects"][0]["name"] == "p"
|
assert d["projects"][0]["name"] == "p"
|
||||||
assert d["projects"][0]["type"] == "project"
|
assert d["projects"][0]["type"] == "project"
|
||||||
assert d["people"][0]["name"] == "Jane Doe"
|
assert d["people"][0]["name"] == "Jane Doe"
|
||||||
assert d["people"][0]["type"] == "person"
|
assert d["people"][0]["type"] == "person"
|
||||||
|
assert d["topics"] == []
|
||||||
assert d["uncertain"] == []
|
assert d["uncertain"] == []
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user