feat(graph): cross-wing tunnels by shared topics (#1180)
When two wings have one or more confirmed TOPIC labels in common, the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). - llm_refine: TOPIC label routes to a dedicated `topics` bucket so the signal survives confirmation instead of getting collapsed into `uncertain` and dropped. - entity_detector / project_scanner: bucket plumbed through the detection pipeline; `confirm_entities` returns confirmed topics alongside people/projects. - miner.add_to_known_entities: optional `wing` parameter records the confirmed topics under `topics_by_wing` in `~/.mempalace/known_entities.json`. Wing names do NOT leak into the flat known-name set used by drawer-tagging. - palace_graph: `compute_topic_tunnels` and `topic_tunnels_for_wing` create symmetric tunnels via the existing `create_tunnel` API so they share dedup and persistence with explicit tunnels. - miner.mine: post-file-loop pass calls `topic_tunnels_for_wing` for the freshly-mined wing. Failures are logged but never abort the mine. - config: `topic_tunnel_min_count` knob (env `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` or `~/.mempalace/config.json`), default 1. Tests cover topic persistence through init->mine, tunnel creation when wings share a topic, no tunnel below threshold, cross-wing tunnel retrieval via `list_tunnels`, dedup on recompute, case-insensitive overlap, and the end-to-end mine-time wiring. Out of scope for this PR (called out in the PR body): manifest- dependency overlap, per-topic allow/deny lists, search-result surfacing.
This commit is contained in:
@@ -499,3 +499,141 @@ def follow_tunnels(wing: str, room: str, col=None, config=None):
|
||||
pass
|
||||
|
||||
return connections
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TOPIC TUNNELS — auto-link wings that share confirmed TOPIC labels
|
||||
# =============================================================================
|
||||
# When two wings have one or more confirmed topics in common (e.g. both
|
||||
# discuss "Angular" or "OpenAPI"), drop a symmetric tunnel between them.
|
||||
# Topics come from the LLM-refined ``TOPIC`` bucket in the per-project
|
||||
# ``entities.json`` and are persisted by wing in
|
||||
# ``~/.mempalace/known_entities.json`` under ``topics_by_wing``.
|
||||
#
|
||||
# Tunnels are created via the existing ``create_tunnel`` API so they share
|
||||
# storage and dedup with explicit tunnels. The room is the topic name —
|
||||
# this matches the "two wings share an idea" mental model and keeps the
|
||||
# graph homogeneous.
|
||||
|
||||
|
||||
def _normalize_topic(name: str) -> str:
|
||||
"""Lowercase + strip topics for case-insensitive overlap detection."""
|
||||
return str(name).strip().lower()
|
||||
|
||||
|
||||
def compute_topic_tunnels(
|
||||
topics_by_wing: dict,
|
||||
min_count: int = 1,
|
||||
label_prefix: str = "shared topic",
|
||||
) -> list[dict]:
|
||||
"""Create tunnels for every pair of wings that share >= ``min_count`` topics.
|
||||
|
||||
Args:
|
||||
topics_by_wing: ``{wing_name: [topic_name, ...]}`` mapping. Topic
|
||||
names are compared case-insensitively; the first observed
|
||||
casing is used for the tunnel room name.
|
||||
min_count: minimum number of overlapping topics required to drop
|
||||
any tunnel between a wing pair. ``1`` means a single shared
|
||||
topic is enough; bumping to e.g. ``2`` requires multiple
|
||||
overlaps and filters out coincidental single-topic links.
|
||||
label_prefix: human-readable string prefixed to the tunnel label.
|
||||
|
||||
Returns:
|
||||
List of tunnel dicts as returned by ``create_tunnel`` — one per
|
||||
(wing_a, wing_b, topic) triple that crossed the threshold. A
|
||||
wing-pair below ``min_count`` produces no tunnels at all (not
|
||||
even for its single shared topic).
|
||||
|
||||
No-op semantics:
|
||||
- empty/None ``topics_by_wing`` returns ``[]``.
|
||||
- wings whose topic list is empty are skipped.
|
||||
- ``min_count <= 0`` is clamped to 1.
|
||||
"""
|
||||
if not topics_by_wing:
|
||||
return []
|
||||
|
||||
min_count = max(1, int(min_count))
|
||||
|
||||
# Build a normalized-topic -> first-seen casing map per wing so we
|
||||
# preserve display casing while still doing case-insensitive overlap.
|
||||
wing_topics: dict[str, dict[str, str]] = {}
|
||||
for wing, names in topics_by_wing.items():
|
||||
if not isinstance(wing, str) or not wing.strip():
|
||||
continue
|
||||
if not isinstance(names, (list, tuple)):
|
||||
continue
|
||||
bucket: dict[str, str] = {}
|
||||
for n in names:
|
||||
if not isinstance(n, str):
|
||||
continue
|
||||
key = _normalize_topic(n)
|
||||
if not key:
|
||||
continue
|
||||
bucket.setdefault(key, n.strip())
|
||||
if bucket:
|
||||
wing_topics[wing.strip()] = bucket
|
||||
|
||||
wings = sorted(wing_topics.keys())
|
||||
created: list[dict] = []
|
||||
for i, wa in enumerate(wings):
|
||||
topics_a = wing_topics[wa]
|
||||
for wb in wings[i + 1 :]:
|
||||
topics_b = wing_topics[wb]
|
||||
shared_keys = set(topics_a.keys()) & set(topics_b.keys())
|
||||
if len(shared_keys) < min_count:
|
||||
continue
|
||||
# Stable sort for deterministic tunnel ordering across runs.
|
||||
for key in sorted(shared_keys):
|
||||
# Prefer the casing from whichever wing sorts first — both
|
||||
# are valid; this just keeps the displayed room consistent.
|
||||
room = topics_a[key] if topics_a[key] else topics_b[key]
|
||||
tunnel = create_tunnel(
|
||||
source_wing=wa,
|
||||
source_room=room,
|
||||
target_wing=wb,
|
||||
target_room=room,
|
||||
label=f"{label_prefix}: {room}",
|
||||
)
|
||||
created.append(tunnel)
|
||||
return created
|
||||
|
||||
|
||||
def topic_tunnels_for_wing(
|
||||
wing: str,
|
||||
topics_by_wing: dict,
|
||||
min_count: int = 1,
|
||||
label_prefix: str = "shared topic",
|
||||
) -> list[dict]:
|
||||
"""Compute topic tunnels involving a single wing.
|
||||
|
||||
Used by the miner to incrementally update tunnels for the wing that
|
||||
just finished mining without recomputing pairs that don't involve it.
|
||||
Returns the list of tunnels created or refreshed.
|
||||
"""
|
||||
if not topics_by_wing or not isinstance(wing, str) or not wing.strip():
|
||||
return []
|
||||
|
||||
wing = wing.strip()
|
||||
own = topics_by_wing.get(wing)
|
||||
if not isinstance(own, (list, tuple)) or not own:
|
||||
return []
|
||||
|
||||
# Restrict the pair-wise computation to (wing, other) pairs only by
|
||||
# building a 2-wing slice for each other wing. Reusing
|
||||
# ``compute_topic_tunnels`` keeps the threshold and casing logic in
|
||||
# one place.
|
||||
created: list[dict] = []
|
||||
for other, other_topics in topics_by_wing.items():
|
||||
if not isinstance(other, str) or not other.strip() or other == wing:
|
||||
continue
|
||||
if not isinstance(other_topics, (list, tuple)) or not other_topics:
|
||||
continue
|
||||
slice_map = {wing: list(own), other: list(other_topics)}
|
||||
created.extend(
|
||||
compute_topic_tunnels(
|
||||
slice_map,
|
||||
min_count=min_count,
|
||||
label_prefix=label_prefix,
|
||||
)
|
||||
)
|
||||
return created
|
||||
|
||||
Reference in New Issue
Block a user