From 4a6147f903a95c9e573ee98e4cb3d624eb3ff8fc Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:47:40 -0300 Subject: [PATCH] feat: offline fact checker against entity registry + knowledge graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fact_checker.py verifies text for contradictions against locally stored entities and KG facts. Catches similar-name confusion (Bob vs Bobby), relationship mismatches (KG says husband, text says brother), and stale facts (KG valid_from/valid_to). No hardcoded facts. No network calls. Reads: - ~/.mempalace/known_entities.json - KnowledgeGraph SQLite Usage: from mempalace.fact_checker import check_text issues = check_text("Bob is Alice's brother", palace_path) # CLI python -m mempalace.fact_checker "text" --palace ~/.mempalace/palace Extracted from Milla's commit 935f657 which bundled this with closet_llm (deferred) and drawer-grep (PR #791). Ported only fact_checker.py — verified no network / API imports. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/fact_checker.py | 177 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 mempalace/fact_checker.py diff --git a/mempalace/fact_checker.py b/mempalace/fact_checker.py new file mode 100644 index 0000000..281f117 --- /dev/null +++ b/mempalace/fact_checker.py @@ -0,0 +1,177 @@ +""" +fact_checker.py — Verify text against known facts in the palace. + +Checks AI responses, diary entries, and new content against the +entity registry and knowledge graph for contradictions. Catches: + - Wrong names (similar but different entities) + - Wrong relationships (calling someone the wrong role) + - Stale facts (things that changed — KG has valid_from/valid_to) + +Uses the entity_registry and knowledge_graph — no hardcoded facts. + +Usage: + from mempalace.fact_checker import check_text + issues = check_text("Bob is Alice's brother", palace_path) + # → [{"type": "relationship_mismatch", "detail": "KG says Bob is Alice's husband"}] + + # CLI + python -m mempalace.fact_checker "Bob is Alice's brother" --palace ~/.mempalace/palace +""" + +import os +import re +from pathlib import Path + + +def check_text(text, palace_path=None, config=None): + """Check text for contradictions against known facts. + + Returns list of issues found. Empty list = no contradictions. + """ + if config is None: + from .config import MempalaceConfig + config = MempalaceConfig() + if palace_path is None: + palace_path = config.palace_path + + issues = [] + + # Load known entities + entity_names = _load_known_entities() + + # Check entity name confusion (similar names that might be mixed up) + issues.extend(_check_entity_confusion(text, entity_names)) + + # Check against knowledge graph facts + issues.extend(_check_kg_facts(text, palace_path)) + + return issues + + +def _load_known_entities(): + """Load entity names from the registry.""" + import json + registry_path = os.path.expanduser("~/.mempalace/known_entities.json") + if not os.path.exists(registry_path): + return {} + try: + return json.loads(open(registry_path).read()) + except Exception: + return {} + + +def _check_entity_confusion(text, entity_names): + """Check if text confuses similar entity names.""" + issues = [] + all_names = set() + for cat in entity_names.values(): + if isinstance(cat, list): + all_names.update(cat) + elif isinstance(cat, dict): + all_names.update(cat.keys()) + + # Find names mentioned in text + mentioned = set() + for name in all_names: + if re.search(r'\b' + re.escape(name) + r'\b', text, re.IGNORECASE): + mentioned.add(name) + + # Check for names that are very similar but different (edit distance 1-2) + name_list = sorted(all_names) + for i, name_a in enumerate(name_list): + for name_b in name_list[i + 1:]: + if _edit_distance(name_a.lower(), name_b.lower()) <= 2: + if name_a in mentioned or name_b in mentioned: + if name_a in text and name_b not in text: + issues.append({ + "type": "similar_name", + "detail": f"'{name_a}' mentioned — did you mean '{name_b}'? (similar names in registry)", + "names": [name_a, name_b], + }) + return issues + + +def _check_kg_facts(text, palace_path): + """Check text against knowledge graph for contradictions.""" + issues = [] + try: + from .knowledge_graph import KnowledgeGraph + kg = KnowledgeGraph(palace_path=palace_path) + + # Extract relationship claims from text + # Pattern: "X is Y's Z" or "X's Z is Y" + patterns = [ + (r"(\w+)\s+is\s+(\w+)'s\s+(\w+)", "subject", "possessor", "role"), + (r"(\w+)'s\s+(\w+)\s+is\s+(\w+)", "possessor", "role", "subject"), + ] + + for pattern, *roles in patterns: + for match in re.finditer(pattern, text, re.IGNORECASE): + groups = match.groups() + subject = groups[0] + # Query KG for this entity + try: + facts = kg.query(subject) + if facts: + for fact in facts: + # Check if the claim contradicts a known fact + if fact.get("valid_to") is None: # current fact + kg_pred = fact.get("predicate", "").lower() + claim = match.group(0).lower() + if kg_pred in claim and fact.get("object", "").lower() not in claim: + issues.append({ + "type": "relationship_mismatch", + "detail": f"Text says '{match.group(0)}' but KG says: {subject} {kg_pred} {fact.get('object')}", + "entity": subject, + }) + except Exception: + pass + except Exception: + pass # KG not available — skip + + return issues + + +def _edit_distance(s1, s2): + """Simple Levenshtein distance.""" + if len(s1) < len(s2): + return _edit_distance(s2, s1) + if len(s2) == 0: + return len(s1) + prev = list(range(len(s2) + 1)) + for i, c1 in enumerate(s1): + curr = [i + 1] + for j, c2 in enumerate(s2): + curr.append(min( + prev[j + 1] + 1, + curr[j] + 1, + prev[j] + (0 if c1 == c2 else 1), + )) + prev = curr + return prev[-1] + + +if __name__ == "__main__": + import argparse + import json + + parser = argparse.ArgumentParser(description="Check text against known facts") + parser.add_argument("text", nargs="?", help="Text to check") + parser.add_argument("--palace", default=os.path.expanduser("~/.mempalace/palace")) + parser.add_argument("--stdin", action="store_true", help="Read from stdin") + args = parser.parse_args() + + if args.stdin: + import sys + text = sys.stdin.read() + elif args.text: + text = args.text + else: + print("Provide text as argument or use --stdin") + exit(1) + + issues = check_text(text, palace_path=args.palace) + if issues: + print(json.dumps(issues, indent=2)) + else: + print("No contradictions found.")