chore(corpus-origin): address Copilot review on #1223
- cli.py: stringify each evidence entry exactly once before the startswith check (was calling str(e) twice per element). - tests: replace brittle `confidence != 0.90` assertion with an equality check against detect_origin_heuristic on the same samples. The original would have spuriously fired if the heuristic ever legitimately produced 0.90 for these samples; the new form pins the contract directly.
This commit is contained in:
+4
-4
@@ -156,12 +156,12 @@ def _run_pass_zero(project_dir, palace_dir, llm_provider) -> dict:
|
|||||||
tier1_prefix = "Tier-1 heuristic: "
|
tier1_prefix = "Tier-1 heuristic: "
|
||||||
tier2_prefix = "Tier-2 LLM: "
|
tier2_prefix = "Tier-2 LLM: "
|
||||||
heuristic_evidence = [
|
heuristic_evidence = [
|
||||||
str(e) if str(e).startswith(tier1_prefix) else f"{tier1_prefix}{e}"
|
s if s.startswith(tier1_prefix) else f"{tier1_prefix}{s}"
|
||||||
for e in result.evidence
|
for s in (str(e) for e in result.evidence)
|
||||||
]
|
]
|
||||||
llm_evidence = [
|
llm_evidence = [
|
||||||
str(e) if str(e).startswith(tier2_prefix) else f"{tier2_prefix}{e}"
|
s if s.startswith(tier2_prefix) else f"{tier2_prefix}{s}"
|
||||||
for e in llm_result.evidence
|
for s in (str(e) for e in llm_result.evidence)
|
||||||
]
|
]
|
||||||
result.evidence = heuristic_evidence + llm_evidence
|
result.evidence = heuristic_evidence + llm_evidence
|
||||||
except Exception as exc: # noqa: BLE001 — never block init on LLM failure
|
except Exception as exc: # noqa: BLE001 — never block init on LLM failure
|
||||||
|
|||||||
@@ -1484,12 +1484,17 @@ def test_merge_tier_fields_heuristic_yes_llm_no_keeps_heuristic_bool():
|
|||||||
f"Got: {res}"
|
f"Got: {res}"
|
||||||
)
|
)
|
||||||
# The bool and the confidence are paired — both must come from the
|
# The bool and the confidence are paired — both must come from the
|
||||||
# heuristic. The mocked LLM returned 0.90; if the merge accidentally
|
# heuristic. Compare to detect_origin_heuristic on the same samples
|
||||||
# took LLM's confidence, this would equal 0.90.
|
# so this stays correct regardless of what the heuristic computes
|
||||||
assert res["confidence"] != 0.90, (
|
# for these samples (avoids brittleness vs. a hardcoded sentinel).
|
||||||
f"Merged confidence equals the mocked LLM's 0.90 — looks like "
|
from mempalace.corpus_origin import detect_origin_heuristic
|
||||||
f"LLM's confidence leaked through the merge. Heuristic's confidence "
|
|
||||||
f"must be preserved alongside its bool. Got: {res}"
|
expected_confidence = detect_origin_heuristic(_ai_dialogue_samples()).confidence
|
||||||
|
assert res["confidence"] == expected_confidence, (
|
||||||
|
f"Merged confidence {res['confidence']} did not match the heuristic's "
|
||||||
|
f"{expected_confidence} for these samples. The mocked LLM returned "
|
||||||
|
f"0.90; if the merge accidentally took the LLM's confidence, the "
|
||||||
|
f"merged value would not equal the heuristic's. Got: {res}"
|
||||||
)
|
)
|
||||||
# Persona/user/platform from LLM should still be merged in.
|
# Persona/user/platform from LLM should still be merged in.
|
||||||
assert res["agent_persona_names"] == [
|
assert res["agent_persona_names"] == [
|
||||||
|
|||||||
Reference in New Issue
Block a user