diff --git a/mempalace/entity_detector.py b/mempalace/entity_detector.py index 80fc107..203c0aa 100644 --- a/mempalace/entity_detector.py +++ b/mempalace/entity_detector.py @@ -17,6 +17,7 @@ Usage: import re import os +import functools from pathlib import Path from collections import defaultdict @@ -60,6 +61,8 @@ PRONOUN_PATTERNS = [ r"\btheir\b", ] +PRONOUN_RE = re.compile("|".join(PRONOUN_PATTERNS), re.IGNORECASE) + # Person signals — dialogue markers DIALOGUE_PATTERNS = [ r"^>\s*{name}[:\s]", # > Speaker: ... @@ -466,6 +469,7 @@ def extract_candidates(text: str) -> dict: # ==================== SIGNAL SCORING ==================== +@functools.lru_cache(maxsize=128) def _build_patterns(name: str) -> dict: """Pre-compile all regex patterns for a single entity name.""" n = re.escape(name) @@ -515,11 +519,9 @@ def score_entity(name: str, text: str, lines: list) -> dict: name_line_indices = [i for i, line in enumerate(lines) if name_lower in line.lower()] pronoun_hits = 0 for idx in name_line_indices: - window_text = " ".join(lines[max(0, idx - 2) : idx + 3]).lower() - for pronoun_pattern in PRONOUN_PATTERNS: - if re.search(pronoun_pattern, window_text): - pronoun_hits += 1 - break + window_text = " ".join(lines[max(0, idx - 2) : idx + 3]) + if PRONOUN_RE.search(window_text): + pronoun_hits += 1 if pronoun_hits > 0: person_score += pronoun_hits * 2 person_signals.append(f"pronoun nearby ({pronoun_hits}x)")