diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py
index 1a171c1..3a0d2c3 100644
--- a/mempalace/backends/chroma.py
+++ b/mempalace/backends/chroma.py
@@ -120,8 +120,7 @@ def quarantine_stale_hnsw(palace_path: str, stale_seconds: float = 3600.0) -> li
             os.rename(seg_dir, target)
             moved.append(target)
             logger.warning(
-                "Quarantined stale HNSW segment %s "
-                "(sqlite %.0fs newer than HNSW); renamed to %s",
+                "Quarantined stale HNSW segment %s (sqlite %.0fs newer than HNSW); renamed to %s",
                 seg_dir,
                 sqlite_mtime - hnsw_mtime,
                 target,
diff --git a/mempalace/convo_scanner.py b/mempalace/convo_scanner.py
index bb8fbef..b592494 100644
--- a/mempalace/convo_scanner.py
+++ b/mempalace/convo_scanner.py
@@ -91,6 +91,14 @@ def _decode_slug_fallback(slug: str) -> str:
     return parts[-1] if parts else slug
 
 
+def _safe_mtime(path: Path) -> float:
+    """Return file mtime, defaulting old on permission or filesystem errors."""
+    try:
+        return path.stat().st_mtime
+    except OSError:
+        return 0.0
+
+
 def _resolve_project_name(project_dir: Path) -> str:
     """Read one session's cwd to recover the original project name.
 
@@ -98,7 +106,7 @@ def _resolve_project_name(project_dir: Path) -> str:
     """
     sessions = sorted(
         (p for p in project_dir.iterdir() if p.is_file() and p.suffix == ".jsonl"),
-        key=lambda p: p.stat().st_mtime,
+        key=_safe_mtime,
         reverse=True,  # newest first — most likely to be well-formed
     )
     for session in sessions:
diff --git a/mempalace/llm_client.py b/mempalace/llm_client.py
index 442cf31..74982ce 100644
--- a/mempalace/llm_client.py
+++ b/mempalace/llm_client.py
@@ -124,7 +124,7 @@ class OllamaProvider(LLMProvider):
         if not names & wanted:
             return (
                 False,
-                f"Model '{self.model}' not loaded in Ollama. " f"Run: ollama pull {self.model}",
+                f"Model '{self.model}' not loaded in Ollama. Run: ollama pull {self.model}",
             )
         return True, "ok"
 
diff --git a/mempalace/llm_refine.py b/mempalace/llm_refine.py
index 91a950c..faa737a 100644
--- a/mempalace/llm_refine.py
+++ b/mempalace/llm_refine.py
@@ -46,6 +46,10 @@ For each candidate, pick exactly ONE label:
 - COMMON_WORD: an English word, verb, or fragment that isn't a named entity at all (e.g. "Created", "Before", "Never")
 - AMBIGUOUS: context is insufficient to decide between two of the above
 
+Frameworks, runtimes, APIs, cloud services, vendors, and third-party products
+(e.g. Angular, OpenAPI, Terraform, Bun, Google) are TOPIC unless the context
+clearly says this is the user's own named codebase, product, or active effort.
+
 Use the provided context lines to disambiguate. A capitalized word that only appears in metadata ("Created: 2026-04-24") is COMMON_WORD. A name that appears with pronouns and dialogue is PERSON.
 
 Respond with JSON only. Schema:
@@ -58,7 +62,7 @@ One entry per candidate, same order as the input."""
 class RefineResult:
     merged: dict  # updated detected dict
     reclassified: int  # entries whose type changed
-    dropped: int  # entries moved out (COMMON_WORD, or AMBIGUOUS sent to uncertain)
+    dropped: int  # entries removed from the merged result (COMMON_WORD only)
     errors: list[str]  # per-batch error messages (transport/parse failures)
     batches_completed: int
     batches_total: int
@@ -70,14 +74,14 @@ def _collect_contexts(
 ) -> list[str]:
     """Return up to `max_lines` distinct lines from the corpus that mention `name`.
 
-    Case-insensitive substring match. Lines are truncated to
+    Case-insensitive token-boundary match. Lines are truncated to
     CONTEXT_WINDOW_CHARS chars to keep token usage bounded.
     """
-    needle = name.lower()
+    needle = re.compile(rf"(?<!\w){re.escape(name)}(?!\w)", re.IGNORECASE)
     seen: set[str] = set()
     out: list[str] = []
     for line in corpus_lines:
-        if needle not in line.lower():
+        if not needle.search(line):
             continue
         trimmed = line.strip()[:CONTEXT_WINDOW_CHARS]
         if not trimmed or trimmed in seen:
@@ -102,20 +106,64 @@ def _build_user_prompt(candidates_with_contexts: list[tuple[str, str, list[str]]
     return "\n".join(parts)
 
 
+def _extract_json_candidates(text: str) -> list[str]:
+    """Return plausible JSON payloads extracted from an LLM response."""
+    text = text.strip()
+    if not text:
+        return []
+
+    candidates: list[str] = [text]
+
+    for match in re.finditer(r"```(?:json)?\s*([\s\S]*?)\s*```", text, re.IGNORECASE):
+        candidate = match.group(1).strip()
+        if candidate and candidate not in candidates:
+            candidates.append(candidate)
+
+    for start, opener in ((i, ch) for i, ch in enumerate(text) if ch in "{["):
+        closer = "}" if opener == "{" else "]"
+        depth = 0
+        in_string = False
+        escaped = False
+        for i in range(start, len(text)):
+            ch = text[i]
+            if in_string:
+                if escaped:
+                    escaped = False
+                elif ch == "\\":
+                    escaped = True
+                elif ch == '"':
+                    in_string = False
+                continue
+
+            if ch == '"':
+                in_string = True
+            elif ch == opener:
+                depth += 1
+            elif ch == closer:
+                depth -= 1
+                if depth == 0:
+                    candidate = text[start : i + 1].strip()
+                    if candidate and candidate not in candidates:
+                        candidates.append(candidate)
+                    break
+
+    return candidates
+
+
 def _parse_response(text: str, expected_names: list[str]) -> dict[str, tuple[str, str]]:
     """Parse the LLM's JSON response into {name: (label, reason)}.
 
     Robust to the model occasionally wrapping JSON in text or returning
     slight schema variations. Falls back to matching by candidate name.
     """
-    # Strip any surrounding fences or prose
-    text = text.strip()
-    if text.startswith("```"):
-        text = re.sub(r"^```(?:json)?\s*", "", text)
-        text = re.sub(r"\s*```\s*$", "", text)
-    try:
-        data = json.loads(text)
-    except json.JSONDecodeError:
+    data = None
+    for candidate in _extract_json_candidates(text):
+        try:
+            data = json.loads(candidate)
+            break
+        except json.JSONDecodeError:
+            continue
+    if data is None:
         return {}
 
     entries = data.get("classifications") if isinstance(data, dict) else data
@@ -142,7 +190,9 @@ def _parse_response(text: str, expected_names: list[str]) -> dict[str, tuple[str
 
 
 def _apply_classifications(
-    detected: dict, decisions: dict[str, tuple[str, str]]
+    detected: dict,
+    decisions: dict[str, tuple[str, str]],
+    allow_project_promotions: bool = True,
 ) -> tuple[dict, int, int]:
     """Merge LLM decisions back into the detected dict.
 
@@ -182,6 +232,12 @@ def _apply_classifications(
             continue
 
         target_bucket = label_to_bucket[label]
+        if (
+            label == "PROJECT"
+            and not allow_project_promotions
+            and not _is_authoritative_project(entry)
+        ):
+            target_bucket = "uncertain"
         updated = dict(entry)
         # Append the LLM's reason as a new signal so the user sees why it moved
         signals = list(updated.get("signals", []))
@@ -201,6 +257,19 @@ def _apply_classifications(
     return new_detected, reclassified, dropped
 
 
+def _is_authoritative_person(entry: dict) -> bool:
+    """Return True for git-author people that should not be second-guessed."""
+    signals = " ".join(entry.get("signals", [])).lower()
+    return "commit" in signals and "repo" in signals
+
+
+def _is_authoritative_project(entry: dict) -> bool:
+    """Return True for manifest/git-backed projects that are already source-backed."""
+    signals = " ".join(entry.get("signals", [])).lower()
+    manifest_markers = ("package.json", "pyproject.toml", "cargo.toml", "go.mod")
+    return any(marker in signals for marker in manifest_markers) or "commit" in signals
+
+
 def _print_progress(batch_idx: int, total: int, current_name: str) -> None:
     """Overwrite-line progress indicator."""
     width = 40
@@ -217,12 +286,13 @@ def refine_entities(
     provider: LLMProvider,
     batch_size: int = BATCH_SIZE,
     show_progress: bool = True,
+    allow_project_promotions: bool = True,
 ) -> RefineResult:
     """Reclassify detected entities using the LLM provider.
 
-    Only candidates in the ``uncertain`` and ``projects`` buckets are sent for
-    refinement — ``people`` entries from git authorship are already
-    high-confidence and don't benefit from LLM second-guessing.
+    Only regex-derived candidates are sent for refinement. Git authors and
+    manifest/git-backed projects are already source-backed and don't benefit
+    from LLM second-guessing.
 
     Ctrl-C during refinement: cancels the remaining batches, returns a
     RefineResult with ``cancelled=True`` and whatever was classified before
@@ -231,16 +301,20 @@ def refine_entities(
 
     Transport or parse failures in individual batches are recorded in
     ``errors`` and do not abort the run.
+
+    ``allow_project_promotions=False`` keeps LLM-only project guesses in the
+    uncertain bucket. This is useful when manifest/git signal already supplied
+    canonical projects and regex/LLM hits are likely tools, vendors, or topics.
     """
-    # Only refine buckets that actually benefit — keep `people` as-is
-    # (git-authored people are already authoritative).
     candidates: list[tuple[str, str]] = []
-    for bucket in ("projects", "uncertain"):
+    current_type = {"people": "person", "projects": "project", "uncertain": "uncertain"}
+    for bucket in ("people", "projects", "uncertain"):
         for e in detected.get(bucket, []):
-            # Skip already-high-confidence entries (manifest-backed projects etc.)
-            if e.get("confidence", 0) >= 0.95 and bucket == "projects":
+            if bucket == "people" and _is_authoritative_person(e):
                 continue
-            candidates.append((e["name"], bucket.rstrip("s")))  # "projects" -> "project"
+            if bucket == "projects" and _is_authoritative_project(e):
+                continue
+            candidates.append((e["name"], current_type[bucket]))
 
     corpus_lines = corpus_text.splitlines() if corpus_text else []
 
@@ -300,7 +374,11 @@ def refine_entities(
         sys.stderr.write("\n")
         sys.stderr.flush()
 
-    merged, reclassified, dropped = _apply_classifications(detected, all_decisions)
+    merged, reclassified, dropped = _apply_classifications(
+        detected,
+        all_decisions,
+        allow_project_promotions=allow_project_promotions,
+    )
 
     return RefineResult(
         merged=merged,
diff --git a/mempalace/project_scanner.py b/mempalace/project_scanner.py
index b5c408e..5b12d5e 100644
--- a/mempalace/project_scanner.py
+++ b/mempalace/project_scanner.py
@@ -632,18 +632,28 @@ def discover_entities(
         else {"people": [], "projects": [], "uncertain": []}
     )
 
-    # If git/manifests gave us real projects, suppress the regex "uncertain" bucket.
-    # That bucket is mostly noise (common words, CamelCase tech terms, etc.) and
-    # adding it to the review flow just makes the user do triage we can skip.
+    # Without LLM refinement, suppress regex "uncertain" noise when real
+    # manifest/git signal exists. With LLM refinement enabled, keep those
+    # candidates so the model can promote real entities or drop common words.
     has_real_signal = bool(projects) or bool(people)
-    merged = _merge_detected(real_signal, prose_detected, drop_secondary_uncertain=has_real_signal)
+    merged = _merge_detected(
+        real_signal,
+        prose_detected,
+        drop_secondary_uncertain=has_real_signal and llm_provider is None,
+    )
 
     # Optional phase 2: LLM refinement.
     if llm_provider is not None:
         from mempalace.llm_refine import collect_corpus_text, refine_entities
 
         corpus = collect_corpus_text(str(project_dir))
-        result = refine_entities(merged, corpus, llm_provider, show_progress=show_progress)
+        result = refine_entities(
+            merged,
+            corpus,
+            llm_provider,
+            show_progress=show_progress,
+            allow_project_promotions=not has_real_signal,
+        )
         if show_progress:
             status_bits = []
             if result.cancelled:
diff --git a/tests/test_convo_scanner.py b/tests/test_convo_scanner.py
index 9fcd339..01e980b 100644
--- a/tests/test_convo_scanner.py
+++ b/tests/test_convo_scanner.py
@@ -1,11 +1,13 @@
 """Tests for mempalace.convo_scanner."""
 
 import json
+from pathlib import Path
 
 from mempalace.convo_scanner import (
     _decode_slug_fallback,
     _extract_cwd_from_session,
     _resolve_project_name,
+    _safe_mtime,
     is_claude_projects_root,
     scan_claude_projects,
 )
@@ -93,6 +95,23 @@ def test_decode_slug_fallback_only_dashes():
     assert _decode_slug_fallback("---") == "---"
 
 
+# ── safe metadata helpers ───────────────────────────────────────────────
+
+
+def test_safe_mtime_returns_zero_on_stat_error(tmp_path, monkeypatch):
+    f = tmp_path / "session.jsonl"
+    f.write_text("{}\n")
+    original_stat = Path.stat
+
+    def fail_stat(self):
+        if self == f:
+            raise OSError("permission denied")
+        return original_stat(self)
+
+    monkeypatch.setattr(Path, "stat", fail_stat)
+    assert _safe_mtime(f) == 0.0
+
+
 # ── _resolve_project_name ───────────────────────────────────────────────
 
 
diff --git a/tests/test_llm_refine.py b/tests/test_llm_refine.py
index 329f91a..b3e7d2d 100644
--- a/tests/test_llm_refine.py
+++ b/tests/test_llm_refine.py
@@ -11,6 +11,9 @@ from mempalace.llm_refine import (
     _apply_classifications,
     _build_user_prompt,
     _collect_contexts,
+    _extract_json_candidates,
+    _is_authoritative_person,
+    _is_authoritative_project,
     _parse_response,
     collect_corpus_text,
     refine_entities,
@@ -62,6 +65,16 @@ def test_collect_contexts_case_insensitive():
     assert out == ["lowercase alice mention"]
 
 
+def test_collect_contexts_uses_token_boundaries():
+    lines = [
+        "forgot should not match",
+        "Go is a language.",
+        "go-v1 shipped.",
+    ]
+    out = _collect_contexts(lines, "Go", max_lines=5)
+    assert out == ["Go is a language.", "go-v1 shipped."]
+
+
 def test_collect_contexts_dedupes_identical_lines():
     lines = ["Alice", "Alice", "Alice was here"]
     out = _collect_contexts(lines, "Alice", max_lines=5)
@@ -131,6 +144,30 @@ def test_parse_response_strips_code_fences():
     assert out["X"][0] == "TOPIC"
 
 
+def test_parse_response_extracts_json_after_prose():
+    text = 'Sure, here is the JSON: {"classifications": [{"name": "X", "label": "TOPIC"}]}'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "TOPIC"
+
+
+def test_parse_response_extracts_fenced_json_after_prose():
+    text = 'Sure:\n```json\n{"classifications": [{"name": "X", "label": "PROJECT"}]}\n```'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "PROJECT"
+
+
+def test_extract_json_candidates_handles_embedded_array():
+    text = 'prefix [{"name": "Y", "label": "PERSON"}] suffix'
+    candidates = _extract_json_candidates(text)
+    assert '[{"name": "Y", "label": "PERSON"}]' in candidates
+
+
+def test_parse_response_ignores_non_json_brackets_before_payload():
+    text = 'See [note] first. JSON: {"classifications": [{"name": "X", "label": "TOPIC"}]}'
+    out = _parse_response(text, ["X"])
+    assert out["X"][0] == "TOPIC"
+
+
 def test_parse_response_malformed_returns_empty():
     out = _parse_response("not json at all", ["X"])
     assert out == {}
@@ -257,6 +294,67 @@ def test_apply_classifications_topic_goes_to_uncertain():
     assert reclass == 1
 
 
+def test_apply_classifications_can_block_llm_only_project_promotion():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Terraform",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 5,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    decisions = {"Terraform": ("PROJECT", "tool")}
+    new, reclass, _ = _apply_classifications(
+        detected,
+        decisions,
+        allow_project_promotions=False,
+    )
+    assert new["projects"] == []
+    assert new["uncertain"][0]["name"] == "Terraform"
+    assert new["uncertain"][0]["type"] == "uncertain"
+    assert reclass == 0
+
+
+def test_apply_classifications_allows_project_promotion_for_prose_only_mode():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Project Aurora",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 5,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    decisions = {"Project Aurora": ("PROJECT", "user effort")}
+    new, reclass, _ = _apply_classifications(detected, decisions)
+    assert new["projects"][0]["name"] == "Project Aurora"
+    assert new["projects"][0]["type"] == "project"
+    assert reclass == 1
+
+
+# ── authoritative source filters ────────────────────────────────────────
+
+
+def test_is_authoritative_person_requires_git_signal():
+    assert _is_authoritative_person({"signals": ["5 commits across 2 repos"]})
+    assert not _is_authoritative_person({"signals": ["pronoun nearby (5x)"]})
+
+
+def test_is_authoritative_project_requires_manifest_or_git_signal():
+    assert _is_authoritative_project({"signals": ["package.json, 12 of your commits"]})
+    assert _is_authoritative_project({"signals": ["57 commits (none by you)"]})
+    assert not _is_authoritative_project({"signals": ["code file reference (5x)"]})
+
+
 # ── refine_entities ─────────────────────────────────────────────────────
 
 
@@ -347,6 +445,93 @@ def test_refine_entities_skips_high_confidence_projects():
     assert provider.call_count == 0
 
 
+def test_refine_entities_refines_high_confidence_regex_projects():
+    """High-confidence regex projects still need LLM review without source signal."""
+    detected = {
+        "people": [],
+        "projects": [
+            {
+                "name": "OpenAPI",
+                "type": "project",
+                "confidence": 0.99,
+                "frequency": 5,
+                "signals": ["code file reference (5x)"],
+            }
+        ],
+        "uncertain": [],
+    }
+    provider = FakeProvider(
+        response_text=(
+            '{"classifications": [{"name": "OpenAPI", "label": "TOPIC", "reason": "technology"}]}'
+        )
+    )
+    result = refine_entities(detected, "OpenAPI schemas", provider, show_progress=False)
+    assert provider.call_count == 1
+    assert result.reclassified == 1
+    assert result.merged["projects"] == []
+    assert result.merged["uncertain"][0]["name"] == "OpenAPI"
+
+
+def test_refine_entities_refines_regex_people_but_skips_git_people():
+    detected = {
+        "people": [
+            {
+                "name": "Igor Lins e Silva",
+                "type": "person",
+                "confidence": 0.99,
+                "frequency": 100,
+                "signals": ["100 commits across 3 repos"],
+            },
+            {
+                "name": "Tool",
+                "type": "person",
+                "confidence": 0.99,
+                "frequency": 5,
+                "signals": ["pronoun nearby (5x)"],
+            },
+        ],
+        "projects": [],
+        "uncertain": [],
+    }
+    provider = FakeProvider(
+        response_text='{"classifications": [{"name": "Tool", "label": "COMMON_WORD"}]}'
+    )
+    result = refine_entities(detected, "Tool is a common noun.", provider, show_progress=False)
+    assert provider.call_count == 1
+    names = [e["name"] for e in result.merged["people"]]
+    assert names == ["Igor Lins e Silva"]
+    assert result.dropped == 1
+
+
+def test_refine_entities_can_keep_llm_only_project_in_uncertain():
+    detected = {
+        "people": [],
+        "projects": [],
+        "uncertain": [
+            {
+                "name": "Terraform",
+                "type": "uncertain",
+                "confidence": 0.4,
+                "frequency": 9,
+                "signals": ["regex"],
+            }
+        ],
+    }
+    provider = FakeProvider(
+        response_text='{"classifications": [{"name": "Terraform", "label": "PROJECT"}]}'
+    )
+    result = refine_entities(
+        detected,
+        "Terraform config",
+        provider,
+        show_progress=False,
+        allow_project_promotions=False,
+    )
+    assert result.merged["projects"] == []
+    assert result.merged["uncertain"][0]["name"] == "Terraform"
+    assert any("LLM: project" in s for s in result.merged["uncertain"][0]["signals"])
+
+
 def test_refine_entities_empty_candidates_returns_noop():
     detected = {"people": [], "projects": [], "uncertain": []}
     provider = FakeProvider()
diff --git a/tests/test_project_scanner.py b/tests/test_project_scanner.py
index 3499796..d8c680b 100644
--- a/tests/test_project_scanner.py
+++ b/tests/test_project_scanner.py
@@ -3,6 +3,7 @@
 import json
 import subprocess
 from pathlib import Path
+from types import SimpleNamespace
 
 from mempalace.project_scanner import (
     PersonInfo,
@@ -390,6 +391,49 @@ def test_discover_entities_prefers_real_signal_over_prose(tmp_path):
     assert "realproj" in proj_names
 
 
+def test_discover_entities_keeps_uncertain_for_llm_when_real_signal(tmp_path):
+    """With --llm, regex-uncertain prose candidates should reach refinement."""
+    (tmp_path / "package.json").write_text(json.dumps({"name": "realproj"}))
+    _init_git_repo(tmp_path)
+    (tmp_path / "doc.md").write_text("Noise appeared. Noise repeated. Noise again.")
+
+    class FakeProvider:
+        def __init__(self):
+            self.prompts = []
+
+        def classify(self, _system, user, json_mode=True):
+            self.prompts.append(user)
+            return SimpleNamespace(
+                text='{"classifications": [{"name": "Noise", "label": "COMMON_WORD"}]}'
+            )
+
+    provider = FakeProvider()
+    d = discover_entities(str(tmp_path), llm_provider=provider, show_progress=False)
+
+    assert len(provider.prompts) == 1
+    assert "Noise" in provider.prompts[0]
+    assert "Noise" not in [e["name"] for cat in d.values() for e in cat]
+
+
+def test_discover_entities_keeps_llm_only_project_uncertain_when_real_signal(tmp_path):
+    """Repo roots should not auto-promote LLM-only tools/topics into projects."""
+    (tmp_path / "package.json").write_text(json.dumps({"name": "realproj"}))
+    _init_git_repo(tmp_path)
+    (tmp_path / "doc.md").write_text("Terraform shipped. Terraform changed. Terraform runs.")
+
+    class FakeProvider:
+        def classify(self, _system, _user, json_mode=True):
+            return SimpleNamespace(
+                text='{"classifications": [{"name": "Terraform", "label": "PROJECT"}]}'
+            )
+
+    d = discover_entities(str(tmp_path), llm_provider=FakeProvider(), show_progress=False)
+
+    assert "realproj" in [e["name"] for e in d["projects"]]
+    assert "Terraform" not in [e["name"] for e in d["projects"]]
+    assert "Terraform" in [e["name"] for e in d["uncertain"]]
+
+
 # ── _UnionFind basics ──────────────────────────────────────────────────