""" test_fact_checker.py — Regression + integration tests for fact_checker. Covers every detection path + the three bugs the original PR silently hid behind ``except Exception: pass``: * ``kg.query()`` doesn't exist — code must use ``query_entity``. * ``KnowledgeGraph(palace_path=...)`` is not a valid kwarg — code must pass ``db_path``. * O(n²) edit-distance over the full registry — must filter to names actually mentioned in the text. Also pins the three feature contracts: * similar_name — "Mila" vs "Milla" in a registry with both. * relationship_mismatch — "Bob is Alice's brother" vs KG "husband". * stale_fact — claim matches a triple whose valid_to is in the past. """ from __future__ import annotations import json from unittest.mock import MagicMock, patch import pytest from mempalace.fact_checker import ( _check_entity_confusion, _edit_distance, _extract_claims, _flatten_names, check_text, ) from mempalace.knowledge_graph import KnowledgeGraph # ── claim extraction ───────────────────────────────────────────────── class TestExtractClaims: def test_parses_x_is_ys_z(self): claims = _extract_claims("Bob is Alice's brother") assert len(claims) == 1 assert claims[0] == { "subject": "Bob", "predicate": "brother", "object": "Alice", "span": "Bob is Alice's brother", } def test_parses_xs_z_is_y(self): claims = _extract_claims("Alice's brother is Bob") assert len(claims) == 1 assert claims[0]["subject"] == "Bob" assert claims[0]["predicate"] == "brother" assert claims[0]["object"] == "Alice" def test_ignores_sentences_without_possessive_role(self): assert _extract_claims("Bob drove to the store today") == [] assert _extract_claims("Just some prose without relationships") == [] def test_multiple_claims_in_one_text(self): claims = _extract_claims("Bob is Alice's brother. Carol is Dave's sister.") subjects = {c["subject"] for c in claims} assert subjects == {"Bob", "Carol"} # ── entity confusion ───────────────────────────────────────────────── class TestEntityConfusion: def test_flags_near_name_when_only_one_mentioned(self): registry = {"people": ["Milla", "Mila"]} issues = _check_entity_confusion("I spoke with Mila today.", registry) # "Mila" mentioned, "Milla" not — registry has both at edit-distance 1, # flag the possible confusion. assert len(issues) == 1 assert issues[0]["type"] == "similar_name" assert set(issues[0]["names"]) == {"Mila", "Milla"} assert issues[0]["distance"] == 1 def test_no_false_positive_when_both_names_mentioned(self): """Regression: a text discussing both Mila and Milla is fine — the user clearly knows they're different. Don't nag.""" registry = {"people": ["Milla", "Mila"]} issues = _check_entity_confusion("Mila and Milla met for lunch.", registry) assert issues == [] def test_no_issues_when_registry_empty(self): assert _check_entity_confusion("Bob said hi", {}) == [] assert _check_entity_confusion("Bob said hi", {"people": []}) == [] def test_no_issues_when_no_mentioned_names(self): registry = {"people": ["Zelda", "Link", "Sheik"]} assert _check_entity_confusion("nothing relevant here", registry) == [] def test_registry_dict_shape_is_supported(self): # Some registries store {"people": {"Alice": {...meta}}}; we still # need to surface the keys as candidate names. registry = {"people": {"Milla": {"role": "creator"}, "Mila": {}}} issues = _check_entity_confusion("I messaged Mila yesterday", registry) assert any("Milla" in (i["names"] or []) for i in issues) class TestEditDistance: def test_basic_distances(self): assert _edit_distance("kitten", "sitting") == 3 assert _edit_distance("mila", "milla") == 1 assert _edit_distance("abc", "abc") == 0 def test_empty_strings(self): assert _edit_distance("", "") == 0 assert _edit_distance("abc", "") == 3 assert _edit_distance("", "abc") == 3 def test_performance_bounded_by_mentioned_names(self): """Regression: an earlier implementation did O(n²) pairwise edit-distance over every registry entry on every check_text call. With 100 names and zero mentions, the call must return in a blink because no edit-distance comparison should even start.""" import time # 500 random names, none of which appear in the text. registry = {"people": [f"Zelda{i:03d}" for i in range(500)]} text = "completely irrelevant prose with no registered names at all" start = time.perf_counter() issues = _check_entity_confusion(text, registry) elapsed = time.perf_counter() - start assert issues == [] # Even an unoptimized implementation should beat this by orders # of magnitude once we've filtered to mentioned names (which is # 0 here) — if it's still doing O(n²), we'll blow past. assert elapsed < 0.2, f"entity confusion took {elapsed:.3f}s on empty mentions" # ── _flatten_names helper ──────────────────────────────────────────── class TestFlattenNames: def test_handles_list_categories(self): assert _flatten_names({"people": ["Ada", "Bob"]}) == {"Ada", "Bob"} def test_handles_dict_categories(self): assert _flatten_names({"people": {"Ada": {}, "Bob": {}}}) == {"Ada", "Bob"} def test_skips_falsy_entries(self): assert _flatten_names({"people": ["Ada", "", None, "Bob"]}) == {"Ada", "Bob"} # ── KG integration (uses a real tmp SQLite palace) ─────────────────── @pytest.fixture def palace_with_kg(tmp_path): """Palace directory with a real KG pre-seeded with a few triples. The KG file lives at ``/knowledge_graph.sqlite3`` — same convention used by the MCP server. Fact-checker must find it via that path, not via a bogus ``palace_path`` kwarg. """ palace = tmp_path / "palace" palace.mkdir() db = str(palace / "knowledge_graph.sqlite3") kg = KnowledgeGraph(db_path=db) yield palace, kg class TestKGContradictions: def test_kg_init_uses_db_path_not_palace_path_kwarg(self): """Regression: the original code passed ``palace_path=`` to a constructor whose only kwarg is ``db_path``. That raised TypeError — silently swallowed — and the KG path became dead code. This test pins the correct call signature.""" # Simply construct via the correct signature; raising means the # KG constructor has changed in a way that fact_checker must too. kg = KnowledgeGraph(db_path=":memory:") # query_entity must exist (this is the method fact_checker calls). assert callable(getattr(kg, "query_entity", None)) # The API that fact_checker used to call does NOT exist. assert not hasattr(kg, "query") def test_relationship_mismatch_detected(self, palace_with_kg): """The feature's headline example: text says brother, KG says husband.""" palace, kg = palace_with_kg kg.add_triple("Bob", "husband_of", "Alice", valid_from="2020-01-01") issues = check_text("Bob is Alice's husband_of", str(palace)) # Exact-predicate + same object → no mismatch. assert all(i["type"] != "relationship_mismatch" for i in issues) issues = check_text("Bob is Alice's brother", str(palace)) mismatches = [i for i in issues if i["type"] == "relationship_mismatch"] assert mismatches, "should flag text/KG mismatch for same (subject, object)" m = mismatches[0] assert m["entity"] == "Bob" assert m["claim"]["predicate"] == "brother" assert m["kg_fact"]["predicate"] == "husband_of" def test_no_false_positive_when_kg_has_no_facts_about_subject(self, palace_with_kg): palace, _ = palace_with_kg # KG is empty → no mismatch should fire. assert check_text("Bob is Alice's brother", str(palace)) == [] def test_stale_fact_detected(self, palace_with_kg): palace, kg = palace_with_kg # An old relationship that was superseded in 2023. Using a # possessive-shape claim so the narrow claim-extraction regex # actually reaches the stale-fact branch. kg.add_triple( "Bob", "brother", "Alice", valid_from="2010-01-01", valid_to="2023-06-01", ) issues = check_text("Bob is Alice's brother", str(palace)) stale = [i for i in issues if i["type"] == "stale_fact"] assert stale, "should flag closed-window fact as stale" assert stale[0]["entity"] == "Bob" assert stale[0]["valid_to"].startswith("2023") def test_current_fact_same_triple_is_not_flagged(self, palace_with_kg): palace, kg = palace_with_kg kg.add_triple("Bob", "brother", "Alice", valid_from="2010-01-01") issues = check_text("Bob is Alice's brother", str(palace)) assert issues == [] def test_missing_palace_does_not_crash(self, tmp_path): """Brand-new palace (no KG file yet) — check_text must return [] rather than raising or hanging.""" nonexistent = str(tmp_path / "never_created") assert check_text("Bob is Alice's brother", nonexistent) == [] # ── end-to-end check_text contract ─────────────────────────────────── class TestCheckTextContract: def test_empty_text_returns_empty_list(self, tmp_path): assert check_text("", str(tmp_path / "palace")) == [] def test_registry_confusion_path_isolated_from_kg(self, tmp_path, monkeypatch): """If the registry file is present but the KG is missing, the similar-name path must still fire. Prior implementations had such entangled state that one failure killed both paths.""" # Bypass the real registry by pointing cache at a temp file. registry = tmp_path / "known_entities.json" registry.write_text(json.dumps({"people": ["Milla", "Mila"]})) from mempalace import miner monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry)) miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}}) issues = check_text("Chatted with Mila.", str(tmp_path / "nonexistent_palace")) assert any(i["type"] == "similar_name" for i in issues) # ── CLI ────────────────────────────────────────────────────────────── class TestCLI: def test_exits_nonzero_when_issues_found(self, tmp_path, monkeypatch, capsys): """The CLI exit code is how shell scripts / hooks know to act — pin it explicitly.""" registry = tmp_path / "known_entities.json" registry.write_text(json.dumps({"people": ["Milla", "Mila"]})) from mempalace import fact_checker, miner monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry)) miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}}) # Simulate argv: "Mila said hi" monkeypatch.setattr( "sys.argv", ["fact_checker", "Mila said hi", "--palace", str(tmp_path / "palace")], ) with pytest.raises(SystemExit) as excinfo: # Re-exec the __main__ block via runpy. import runpy runpy.run_module("mempalace.fact_checker", run_name="__main__") # Issues found → exit code 1. assert excinfo.value.code == 1 out = capsys.readouterr().out assert "similar_name" in out # Silence unused import warning. _ = (MagicMock, patch, fact_checker)