From 5e33592ba2f659314542c7db30ad106459068876 Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Sun, 26 Apr 2026 18:43:19 -0300
Subject: [PATCH] chore(corpus-origin): address Copilot review on #1223

- cli.py: stringify each evidence entry exactly once before the
  startswith check (was calling str(e) twice per element).
- tests: replace brittle `confidence != 0.90` assertion with an
  equality check against detect_origin_heuristic on the same samples.
  The original would have spuriously fired if the heuristic ever
  legitimately produced 0.90 for these samples; the new form pins the
  contract directly.
---
 mempalace/cli.py                        |  8 ++++----
 tests/test_corpus_origin_integration.py | 17 +++++++++++------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/mempalace/cli.py b/mempalace/cli.py
index 92743a2..51e3109 100644
--- a/mempalace/cli.py
+++ b/mempalace/cli.py
@@ -156,12 +156,12 @@ def _run_pass_zero(project_dir, palace_dir, llm_provider) -> dict:
             tier1_prefix = "Tier-1 heuristic: "
             tier2_prefix = "Tier-2 LLM: "
             heuristic_evidence = [
-                str(e) if str(e).startswith(tier1_prefix) else f"{tier1_prefix}{e}"
-                for e in result.evidence
+                s if s.startswith(tier1_prefix) else f"{tier1_prefix}{s}"
+                for s in (str(e) for e in result.evidence)
             ]
             llm_evidence = [
-                str(e) if str(e).startswith(tier2_prefix) else f"{tier2_prefix}{e}"
-                for e in llm_result.evidence
+                s if s.startswith(tier2_prefix) else f"{tier2_prefix}{s}"
+                for s in (str(e) for e in llm_result.evidence)
             ]
             result.evidence = heuristic_evidence + llm_evidence
         except Exception as exc:  # noqa: BLE001 — never block init on LLM failure
diff --git a/tests/test_corpus_origin_integration.py b/tests/test_corpus_origin_integration.py
index 08c3027..8cffd08 100644
--- a/tests/test_corpus_origin_integration.py
+++ b/tests/test_corpus_origin_integration.py
@@ -1484,12 +1484,17 @@ def test_merge_tier_fields_heuristic_yes_llm_no_keeps_heuristic_bool():
         f"Got: {res}"
     )
     # The bool and the confidence are paired — both must come from the
-    # heuristic. The mocked LLM returned 0.90; if the merge accidentally
-    # took LLM's confidence, this would equal 0.90.
-    assert res["confidence"] != 0.90, (
-        f"Merged confidence equals the mocked LLM's 0.90 — looks like "
-        f"LLM's confidence leaked through the merge. Heuristic's confidence "
-        f"must be preserved alongside its bool. Got: {res}"
+    # heuristic. Compare to detect_origin_heuristic on the same samples
+    # so this stays correct regardless of what the heuristic computes
+    # for these samples (avoids brittleness vs. a hardcoded sentinel).
+    from mempalace.corpus_origin import detect_origin_heuristic
+
+    expected_confidence = detect_origin_heuristic(_ai_dialogue_samples()).confidence
+    assert res["confidence"] == expected_confidence, (
+        f"Merged confidence {res['confidence']} did not match the heuristic's "
+        f"{expected_confidence} for these samples. The mocked LLM returned "
+        f"0.90; if the merge accidentally took the LLM's confidence, the "
+        f"merged value would not equal the heuristic's. Got: {res}"
     )
     # Persona/user/platform from LLM should still be merged in.
     assert res["agent_persona_names"] == [