feat(graph): cross-wing tunnels by shared topics (#1180)
When two wings have one or more confirmed TOPIC labels in common, the miner now drops a symmetric tunnel between them at mine time so the palace graph reflects shared themes (frameworks, vendors, recurring concepts). - llm_refine: TOPIC label routes to a dedicated `topics` bucket so the signal survives confirmation instead of getting collapsed into `uncertain` and dropped. - entity_detector / project_scanner: bucket plumbed through the detection pipeline; `confirm_entities` returns confirmed topics alongside people/projects. - miner.add_to_known_entities: optional `wing` parameter records the confirmed topics under `topics_by_wing` in `~/.mempalace/known_entities.json`. Wing names do NOT leak into the flat known-name set used by drawer-tagging. - palace_graph: `compute_topic_tunnels` and `topic_tunnels_for_wing` create symmetric tunnels via the existing `create_tunnel` API so they share dedup and persistence with explicit tunnels. - miner.mine: post-file-loop pass calls `topic_tunnels_for_wing` for the freshly-mined wing. Failures are logged but never abort the mine. - config: `topic_tunnel_min_count` knob (env `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` or `~/.mempalace/config.json`), default 1. Tests cover topic persistence through init->mine, tunnel creation when wings share a topic, no tunnel below threshold, cross-wing tunnel retrieval via `list_tunnels`, dedup on recompute, case-insensitive overlap, and the end-to-end mine-time wiring. Out of scope for this PR (called out in the PR body): manifest- dependency overlap, per-topic allow/deny lists, search-result surfacing.
This commit is contained in:
@@ -235,13 +235,13 @@ def test_detect_entities_empty_files(tmp_path):
|
||||
f = tmp_path / "empty.txt"
|
||||
f.write_text("")
|
||||
result = detect_entities([f])
|
||||
assert result == {"people": [], "projects": [], "uncertain": []}
|
||||
assert result == {"people": [], "projects": [], "topics": [], "uncertain": []}
|
||||
|
||||
|
||||
def test_detect_entities_handles_missing_file(tmp_path):
|
||||
missing = tmp_path / "nonexistent.txt"
|
||||
result = detect_entities([missing])
|
||||
assert result == {"people": [], "projects": [], "uncertain": []}
|
||||
assert result == {"people": [], "projects": [], "topics": [], "uncertain": []}
|
||||
|
||||
|
||||
def test_detect_entities_respects_max_files(tmp_path):
|
||||
|
||||
@@ -206,3 +206,71 @@ def test_populated_registry_improves_miner_recall(temp_registry):
|
||||
# All four registered entities should land in the metadata string
|
||||
for expected in ("Julia Grib", "Kevin Heifner", "hyperion-history", "mempalace"):
|
||||
assert expected in tagged, f"expected '{expected}' in metadata {tagged!r}"
|
||||
|
||||
|
||||
# ── topics_by_wing — cross-wing tunnel signal source (issue #1180) ──
|
||||
|
||||
|
||||
def test_topics_persisted_under_topics_by_wing(temp_registry):
|
||||
miner.add_to_known_entities(
|
||||
{"people": ["Alice"], "topics": ["Angular", "OpenAPI"]},
|
||||
wing="wing_alpha",
|
||||
)
|
||||
data = json.loads(temp_registry.read_text())
|
||||
# Topics also stored as a flat list (existing-style aggregate).
|
||||
assert "Angular" in data["topics"]
|
||||
# And recorded by wing for tunnel computation.
|
||||
assert data["topics_by_wing"]["wing_alpha"] == ["Angular", "OpenAPI"]
|
||||
|
||||
|
||||
def test_topics_by_wing_replaces_on_reinit(temp_registry):
|
||||
"""Re-running init for the same wing should reflect the latest list,
|
||||
not accumulate stale topics indefinitely."""
|
||||
miner.add_to_known_entities({"topics": ["Angular", "OpenAPI"]}, wing="wing_alpha")
|
||||
miner.add_to_known_entities({"topics": ["OpenAPI", "Postgres"]}, wing="wing_alpha")
|
||||
data = json.loads(temp_registry.read_text())
|
||||
assert data["topics_by_wing"]["wing_alpha"] == ["OpenAPI", "Postgres"]
|
||||
|
||||
|
||||
def test_topics_by_wing_multiple_wings_coexist(temp_registry):
|
||||
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_a")
|
||||
miner.add_to_known_entities({"topics": ["foo", "bar"]}, wing="wing_b")
|
||||
data = json.loads(temp_registry.read_text())
|
||||
assert data["topics_by_wing"] == {"wing_a": ["foo"], "wing_b": ["foo", "bar"]}
|
||||
|
||||
|
||||
def test_topics_by_wing_skipped_without_wing(temp_registry):
|
||||
miner.add_to_known_entities({"topics": ["foo"]})
|
||||
data = json.loads(temp_registry.read_text())
|
||||
# No wing → no topics_by_wing entry, but topics list still saved.
|
||||
assert "topics_by_wing" not in data
|
||||
assert data["topics"] == ["foo"]
|
||||
|
||||
|
||||
def test_topics_by_wing_dedupes_case_insensitive(temp_registry):
|
||||
miner.add_to_known_entities({"topics": ["OpenAPI", "openapi", "OPENAPI"]}, wing="wing_a")
|
||||
data = json.loads(temp_registry.read_text())
|
||||
# Only one entry, casing of the first observed name preserved.
|
||||
assert data["topics_by_wing"]["wing_a"] == ["OpenAPI"]
|
||||
|
||||
|
||||
def test_get_topics_by_wing_reads_registry(temp_registry):
|
||||
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_a")
|
||||
miner.add_to_known_entities({"topics": ["foo", "bar"]}, wing="wing_b")
|
||||
result = miner.get_topics_by_wing()
|
||||
assert result == {"wing_a": ["foo"], "wing_b": ["foo", "bar"]}
|
||||
|
||||
|
||||
def test_get_topics_by_wing_empty_when_missing(temp_registry):
|
||||
miner.add_to_known_entities({"people": ["Alice"]})
|
||||
assert miner.get_topics_by_wing() == {}
|
||||
|
||||
|
||||
def test_topics_by_wing_does_not_pollute_known_names(temp_registry):
|
||||
"""Wing names in topics_by_wing must NOT leak into the flat known-names
|
||||
set used by ``_extract_entities_for_metadata`` — only the topic strings
|
||||
themselves should be recognized."""
|
||||
miner.add_to_known_entities({"topics": ["Angular"]}, wing="wing_super_secret_project")
|
||||
known = miner._load_known_entities()
|
||||
assert "Angular" in known
|
||||
assert "wing_super_secret_project" not in known
|
||||
|
||||
@@ -272,7 +272,9 @@ def test_apply_classifications_appends_reason_signal():
|
||||
assert any("spoken of by name" in s for s in new["people"][0]["signals"])
|
||||
|
||||
|
||||
def test_apply_classifications_topic_goes_to_uncertain():
|
||||
def test_apply_classifications_topic_goes_to_topics_bucket():
|
||||
"""TOPIC classifications now route to a dedicated ``topics`` bucket so the
|
||||
miner can use them as cross-wing tunnel signal (issue #1180)."""
|
||||
detected = {
|
||||
"people": [],
|
||||
"projects": [
|
||||
@@ -289,8 +291,32 @@ def test_apply_classifications_topic_goes_to_uncertain():
|
||||
decisions = {"Paris": ("TOPIC", "city, not a project")}
|
||||
new, reclass, _ = _apply_classifications(detected, decisions)
|
||||
assert len(new["projects"]) == 0
|
||||
assert len(new["uncertain"]) == 0
|
||||
assert len(new["topics"]) == 1
|
||||
assert new["topics"][0]["name"] == "Paris"
|
||||
assert new["topics"][0]["type"] == "topic"
|
||||
assert reclass == 1
|
||||
|
||||
|
||||
def test_apply_classifications_ambiguous_still_goes_to_uncertain():
|
||||
detected = {
|
||||
"people": [],
|
||||
"projects": [
|
||||
{
|
||||
"name": "Foo",
|
||||
"type": "project",
|
||||
"confidence": 0.7,
|
||||
"frequency": 5,
|
||||
"signals": ["regex"],
|
||||
}
|
||||
],
|
||||
"uncertain": [],
|
||||
}
|
||||
decisions = {"Foo": ("AMBIGUOUS", "context insufficient")}
|
||||
new, reclass, _ = _apply_classifications(detected, decisions)
|
||||
assert len(new["projects"]) == 0
|
||||
assert len(new["uncertain"]) == 1
|
||||
assert new["uncertain"][0]["name"] == "Paris"
|
||||
assert new["uncertain"][0]["name"] == "Foo"
|
||||
assert reclass == 1
|
||||
|
||||
|
||||
@@ -469,7 +495,9 @@ def test_refine_entities_refines_high_confidence_regex_projects():
|
||||
assert provider.call_count == 1
|
||||
assert result.reclassified == 1
|
||||
assert result.merged["projects"] == []
|
||||
assert result.merged["uncertain"][0]["name"] == "OpenAPI"
|
||||
# TOPIC labels go to the dedicated ``topics`` bucket so the miner can
|
||||
# use them for cross-wing tunnel computation (issue #1180).
|
||||
assert result.merged["topics"][0]["name"] == "OpenAPI"
|
||||
|
||||
|
||||
def test_refine_entities_refines_regex_people_but_skips_git_people():
|
||||
|
||||
@@ -496,3 +496,104 @@ def test_add_drawer_stamps_normalize_version(tmp_path):
|
||||
assert meta["normalize_version"] == NORMALIZE_VERSION
|
||||
finally:
|
||||
del col, client
|
||||
|
||||
|
||||
def test_mine_creates_topic_tunnels_for_shared_topics(tmp_path, monkeypatch):
|
||||
"""End-to-end: when two wings have already-confirmed topics that overlap,
|
||||
the miner's mine-time pass drops a cross-wing tunnel between them.
|
||||
|
||||
Issue #1180.
|
||||
"""
|
||||
from mempalace import miner, palace_graph
|
||||
|
||||
# Redirect both the registry and tunnel-storage paths into tmp_path
|
||||
# so we never touch the developer's real ~/.mempalace directory.
|
||||
registry = tmp_path / "known_entities.json"
|
||||
monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
|
||||
miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
|
||||
tunnels_file = tmp_path / "tunnels.json"
|
||||
monkeypatch.setattr(palace_graph, "_TUNNEL_FILE", str(tunnels_file))
|
||||
|
||||
# Pre-populate the registry as if init had been run for two wings that
|
||||
# share a topic.
|
||||
miner.add_to_known_entities({"topics": ["foo", "bar"]}, wing="wing_one")
|
||||
miner.add_to_known_entities({"topics": ["foo", "baz"]}, wing="wing_two")
|
||||
|
||||
# Mine wing_two — should drop tunnels between wing_two and wing_one
|
||||
# for every shared topic. Just one in this case.
|
||||
project_root = tmp_path / "wing_two_project"
|
||||
project_root.mkdir()
|
||||
write_file(
|
||||
project_root / "notes.md",
|
||||
"Some prose long enough to make a chunk. " * 20,
|
||||
)
|
||||
with open(project_root / "mempalace.yaml", "w") as f:
|
||||
yaml.dump({"wing": "wing_two", "rooms": [{"name": "general"}]}, f)
|
||||
|
||||
palace_path = tmp_path / "palace"
|
||||
mine(str(project_root), str(palace_path))
|
||||
|
||||
listed = palace_graph.list_tunnels()
|
||||
assert len(listed) == 1
|
||||
rooms = {listed[0]["source"]["room"], listed[0]["target"]["room"]}
|
||||
assert rooms == {"foo"}
|
||||
wings = {listed[0]["source"]["wing"], listed[0]["target"]["wing"]}
|
||||
assert wings == {"wing_one", "wing_two"}
|
||||
|
||||
|
||||
def test_mine_no_tunnel_when_threshold_blocks_overlap(tmp_path, monkeypatch):
|
||||
"""Bumping ``MEMPALACE_TOPIC_TUNNEL_MIN_COUNT`` above the actual overlap
|
||||
suppresses tunnel creation."""
|
||||
from mempalace import miner, palace_graph
|
||||
|
||||
registry = tmp_path / "known_entities.json"
|
||||
monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
|
||||
miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
|
||||
tunnels_file = tmp_path / "tunnels.json"
|
||||
monkeypatch.setattr(palace_graph, "_TUNNEL_FILE", str(tunnels_file))
|
||||
monkeypatch.setenv("MEMPALACE_TOPIC_TUNNEL_MIN_COUNT", "2")
|
||||
|
||||
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_one")
|
||||
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_two")
|
||||
|
||||
project_root = tmp_path / "wing_two_project"
|
||||
project_root.mkdir()
|
||||
write_file(
|
||||
project_root / "notes.md",
|
||||
"Some prose long enough to make a chunk. " * 20,
|
||||
)
|
||||
with open(project_root / "mempalace.yaml", "w") as f:
|
||||
yaml.dump({"wing": "wing_two", "rooms": [{"name": "general"}]}, f)
|
||||
|
||||
palace_path = tmp_path / "palace"
|
||||
mine(str(project_root), str(palace_path))
|
||||
|
||||
# min_count=2 but only 1 shared topic → no tunnel.
|
||||
assert palace_graph.list_tunnels() == []
|
||||
|
||||
|
||||
def test_mine_no_tunnel_when_only_one_wing_has_topics(tmp_path, monkeypatch):
|
||||
"""A wing in isolation (no other wing has confirmed topics) creates no tunnels."""
|
||||
from mempalace import miner, palace_graph
|
||||
|
||||
registry = tmp_path / "known_entities.json"
|
||||
monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
|
||||
miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
|
||||
tunnels_file = tmp_path / "tunnels.json"
|
||||
monkeypatch.setattr(palace_graph, "_TUNNEL_FILE", str(tunnels_file))
|
||||
|
||||
miner.add_to_known_entities({"topics": ["foo"]}, wing="wing_one")
|
||||
|
||||
project_root = tmp_path / "wing_one_project"
|
||||
project_root.mkdir()
|
||||
write_file(
|
||||
project_root / "notes.md",
|
||||
"Some prose long enough to make a chunk. " * 20,
|
||||
)
|
||||
with open(project_root / "mempalace.yaml", "w") as f:
|
||||
yaml.dump({"wing": "wing_one", "rooms": [{"name": "general"}]}, f)
|
||||
|
||||
palace_path = tmp_path / "palace"
|
||||
mine(str(project_root), str(palace_path))
|
||||
|
||||
assert palace_graph.list_tunnels() == []
|
||||
|
||||
@@ -135,3 +135,126 @@ class TestExplicitTunnels:
|
||||
connections = palace_graph.follow_tunnels("wing_code", "auth", col=col)
|
||||
assert len(connections) == 1
|
||||
assert "drawer_preview" not in connections[0]
|
||||
|
||||
|
||||
class TestTopicTunnels:
|
||||
"""Cross-wing topic tunnels (issue #1180).
|
||||
|
||||
When two wings share confirmed TOPIC labels above a configurable
|
||||
threshold, a symmetric tunnel is created between them. Tunnels are
|
||||
routed through the existing ``create_tunnel`` storage so they share
|
||||
dedup and persistence with explicit tunnels.
|
||||
"""
|
||||
|
||||
def test_compute_topic_tunnels_creates_link_for_shared_topic(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {
|
||||
"wing_alpha": ["Angular", "OpenAPI"],
|
||||
"wing_beta": ["OpenAPI", "Kubernetes"],
|
||||
}
|
||||
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||
assert len(created) == 1
|
||||
assert created[0]["source"]["wing"] in {"wing_alpha", "wing_beta"}
|
||||
assert created[0]["target"]["wing"] in {"wing_alpha", "wing_beta"}
|
||||
# Room is the topic itself (case preserved from the first wing).
|
||||
assert created[0]["source"]["room"] == "OpenAPI"
|
||||
assert "OpenAPI" in created[0]["label"]
|
||||
|
||||
# Tunnel is retrievable via the standard list_tunnels API.
|
||||
listed = palace_graph.list_tunnels()
|
||||
assert len(listed) == 1
|
||||
assert listed[0]["id"] == created[0]["id"]
|
||||
|
||||
def test_compute_topic_tunnels_no_link_below_threshold(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {
|
||||
"wing_alpha": ["Angular", "OpenAPI"],
|
||||
"wing_beta": ["OpenAPI", "Kubernetes"],
|
||||
}
|
||||
# min_count=2 requires two overlapping topics — only one shared.
|
||||
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=2)
|
||||
assert created == []
|
||||
assert palace_graph.list_tunnels() == []
|
||||
|
||||
def test_compute_topic_tunnels_above_threshold_creates_per_topic_links(
|
||||
self, tmp_path, monkeypatch
|
||||
):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {
|
||||
"wing_alpha": ["Angular", "OpenAPI", "Postgres"],
|
||||
"wing_beta": ["Angular", "OpenAPI", "Redis"],
|
||||
}
|
||||
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=2)
|
||||
# Two shared topics × one wing pair = two tunnels.
|
||||
rooms = sorted(t["source"]["room"] for t in created)
|
||||
assert rooms == ["Angular", "OpenAPI"]
|
||||
|
||||
def test_compute_topic_tunnels_case_insensitive_overlap(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {
|
||||
"wing_alpha": ["openapi"],
|
||||
"wing_beta": ["OpenAPI"],
|
||||
}
|
||||
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||
assert len(created) == 1
|
||||
|
||||
def test_compute_topic_tunnels_empty_input_is_noop(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
assert palace_graph.compute_topic_tunnels({}) == []
|
||||
assert palace_graph.compute_topic_tunnels({"wing_a": []}) == []
|
||||
assert palace_graph.list_tunnels() == []
|
||||
|
||||
def test_compute_topic_tunnels_three_wings_pairwise(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {
|
||||
"wing_a": ["foo"],
|
||||
"wing_b": ["foo"],
|
||||
"wing_c": ["foo"],
|
||||
}
|
||||
created = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||
# 3 wings sharing the same topic → C(3,2) = 3 pairs → 3 tunnels.
|
||||
assert len(created) == 3
|
||||
endpoint_pairs = {
|
||||
tuple(sorted([t["source"]["wing"], t["target"]["wing"]])) for t in created
|
||||
}
|
||||
assert endpoint_pairs == {
|
||||
("wing_a", "wing_b"),
|
||||
("wing_a", "wing_c"),
|
||||
("wing_b", "wing_c"),
|
||||
}
|
||||
|
||||
def test_topic_tunnels_for_wing_only_links_that_wing(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {
|
||||
"wing_a": ["foo", "bar"],
|
||||
"wing_b": ["foo"],
|
||||
"wing_c": ["bar"],
|
||||
}
|
||||
# wing_a should link to both b (via foo) and c (via bar).
|
||||
created = palace_graph.topic_tunnels_for_wing("wing_a", topics_by_wing)
|
||||
endpoint_pairs = {
|
||||
tuple(sorted([t["source"]["wing"], t["target"]["wing"]])) for t in created
|
||||
}
|
||||
assert endpoint_pairs == {("wing_a", "wing_b"), ("wing_a", "wing_c")}
|
||||
# The b-c pair is NOT created because wing_a's incremental pass
|
||||
# only computes pairs that include wing_a.
|
||||
assert len(palace_graph.list_tunnels()) == 2
|
||||
|
||||
def test_topic_tunnels_for_wing_unknown_wing_is_noop(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {"wing_a": ["foo"], "wing_b": ["foo"]}
|
||||
assert palace_graph.topic_tunnels_for_wing("wing_missing", topics_by_wing) == []
|
||||
assert palace_graph.list_tunnels() == []
|
||||
|
||||
def test_compute_topic_tunnels_dedupe_on_recompute(self, tmp_path, monkeypatch):
|
||||
_use_tmp_tunnel_file(monkeypatch, tmp_path)
|
||||
topics_by_wing = {
|
||||
"wing_alpha": ["OpenAPI"],
|
||||
"wing_beta": ["OpenAPI"],
|
||||
}
|
||||
first = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||
second = palace_graph.compute_topic_tunnels(topics_by_wing, min_count=1)
|
||||
# create_tunnel is symmetric/dedupe — repeated computation should
|
||||
# not multiply the stored tunnels.
|
||||
assert first[0]["id"] == second[0]["id"]
|
||||
assert len(palace_graph.list_tunnels()) == 1
|
||||
|
||||
@@ -363,11 +363,14 @@ def test_to_detected_dict_shape():
|
||||
projects = [ProjectInfo(name="p", repo_root=Path("."), is_mine=True, manifest="package.json")]
|
||||
people = [PersonInfo(name="Jane Doe", total_commits=5, repos={"r"})]
|
||||
d = to_detected_dict(projects, people)
|
||||
assert set(d.keys()) == {"people", "projects", "uncertain"}
|
||||
# ``topics`` is the LLM-refine bucket for cross-wing tunnel signal —
|
||||
# always present even when empty so callers can rely on the shape.
|
||||
assert set(d.keys()) == {"people", "projects", "topics", "uncertain"}
|
||||
assert d["projects"][0]["name"] == "p"
|
||||
assert d["projects"][0]["type"] == "project"
|
||||
assert d["people"][0]["name"] == "Jane Doe"
|
||||
assert d["people"][0]["type"] == "person"
|
||||
assert d["topics"] == []
|
||||
assert d["uncertain"] == []
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user