tests/test_corpus_origin_integration.py

"""Integration tests proving corpus_origin actually improves classification.

These are the tests that justify the PR. Without them, the PR ships
infrastructure that nobody can prove improves v3.3.3.

The fixture: a small AI-dialogue corpus with three agent persona names
(Echo, Sparrow, Cipher) that the user (Jordan) has assigned to their AI
agents. On plain v3.3.3, entity_detector misclassifies these as PEOPLE.
With corpus_origin context wired through, they classify as
AGENT_PERSONA instead.

Two tests sit side by side:

  test_baseline_v333_misclassifies_persona_names_as_people
    Pins v3.3.3's behavior. If this starts failing, the PR's motivation
    has shifted and the corpus_origin docs need revisiting.

  test_corpus_origin_reclassifies_personas
    The fix. Asserts that when corpus_origin context is passed,
    persona names land in agent_personas instead of people.

Together: documented before/after of v3.3.3 → corpus-origin feature.
"""

from __future__ import annotations

import argparse
import json
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest


# A synthetic but realistic Claude Code transcript fixture. Three persona
# names appear repeatedly in dialogue patterns that the v3.3.3
# entity_detector treats as person-evidence (dialogue markers, action verbs,
# pronoun proximity). User name "Jordan" also appears in dialogue.
#
# The point is: every name here CAN be a real human name. v3.3.3 has no
# way to know that in this corpus they're agent personas, not people. The
# corpus_origin gives it that context.
AI_DIALOGUE_FIXTURE = """\
# Session log — 2026-04-20

Jordan: Echo, can you summarize what we worked on yesterday?

Echo (assistant): Yesterday we refactored the embedding pipeline. I noticed
the chunking strategy was producing overlapping windows, and I suggested
moving to a sliding window with explicit stride. You agreed and we shipped
the change.

Jordan: Good. Sparrow, what about the migration script — did you finish?

Sparrow (assistant): Yes, I finished the migration. I tested it locally
against the staging snapshot and it ran clean. I also added a rollback
path because you asked me to be cautious about the indexes.

Jordan: Perfect. Cipher, run the verification suite please.

Cipher (assistant): Running now. I'll report back when the full suite
completes. I expect it to take about four minutes.

Echo: Jordan, while Cipher runs the verification, do you want me to draft
the changelog entry for today's work?

Jordan: Yes please. Echo, keep it short. Sparrow, please review Echo's
draft when she sends it.

Sparrow: Will do. I'll look for clarity issues and check the migration
phrasing matches what we actually shipped.

Cipher: Verification complete. All 1247 tests pass. I'm filing the run log
to the palace under wing/today.

Jordan: Thanks Cipher. Echo, send the changelog draft.

Echo: Done. Sent to the channel. Sparrow, ready for review when you are.

Sparrow: Reviewed. Two small wording changes — sent back. Otherwise clean.

Jordan: Echo, apply Sparrow's edits and ship it.

Echo: Shipped. Tag pushed.
"""


@pytest.fixture
def ai_dialogue_corpus(tmp_path: Path) -> Path:
    """Create a one-file project directory containing the AI-dialogue fixture."""
    project_dir = tmp_path / "ai_dialogue_project"
    project_dir.mkdir()
    (project_dir / "session_log.md").write_text(AI_DIALOGUE_FIXTURE)
    return project_dir


@pytest.fixture
def corpus_origin_for_fixture() -> dict:
    """The corpus_origin result a context-aware init would produce for the fixture."""
    return {
        "schema_version": 1,
        "detected_at": "2026-04-26T00:00:00Z",
        "result": {
            "likely_ai_dialogue": True,
            "confidence": 0.95,
            "primary_platform": "Claude (Anthropic)",
            "user_name": "Jordan",
            "agent_persona_names": ["Echo", "Sparrow", "Cipher"],
            "evidence": ["Synthetic fixture for the integration test"],
        },
    }


# ── Baseline test: pin v3.3.3 behavior ────────────────────────────────────


def test_baseline_v333_misclassifies_persona_names_as_people(ai_dialogue_corpus: Path):
    """Without corpus_origin context, v3.3.3 entity_detector cannot
    distinguish agent persona names from real people, and classifies them
    into the 'people' bucket.

    This test pins that behavior. Its purpose is documentation —
    The corpus-origin feature's job is to fix this, and the post-fix test below
    asserts the fix.
    """
    from mempalace.entity_detector import detect_entities, scan_for_detection

    files = scan_for_detection(str(ai_dialogue_corpus))
    detected = detect_entities(files)

    people_names = {e["name"] for e in detected.get("people", [])}
    uncertain_names = {e["name"] for e in detected.get("uncertain", [])}
    all_classified = people_names | uncertain_names

    # Persona names appear somewhere in the detection output (people or uncertain).
    # If none of them surface at all, the fixture is no longer triggering
    # the misclassification path and the test is no longer meaningful.
    persona_names = {"Echo", "Sparrow", "Cipher"}
    persona_hits = persona_names & all_classified
    assert persona_hits, (
        "Fixture no longer surfaces persona names as detected entities. "
        "Update the fixture to keep this test meaningful."
    )

    # No agent_personas bucket exists on v3.3.3.
    assert "agent_personas" not in detected, (
        "v3.3.3 has no concept of agent_personas — if this key exists, "
        "corpus-origin wiring has already shipped and this baseline test is stale."
    )


# ── corpus-origin test: with corpus_origin, personas reclassify ───────────


def test_corpus_origin_reclassifies_personas(
    ai_dialogue_corpus: Path, corpus_origin_for_fixture: dict
):
    """When corpus_origin context is passed to detect_entities, names
    matching agent_persona_names land in an 'agent_personas' bucket
    instead of being misclassified as people.

    This is the fix. RED until the consumer wiring lands.
    """
    from mempalace.entity_detector import detect_entities, scan_for_detection

    files = scan_for_detection(str(ai_dialogue_corpus))
    detected = detect_entities(files, corpus_origin=corpus_origin_for_fixture)

    # New bucket exists.
    assert "agent_personas" in detected, (
        "The corpus-origin wiring must add an 'agent_personas' bucket to the detect_entities "
        "return shape when corpus_origin is provided."
    )

    persona_names_in_bucket = {e["name"] for e in detected["agent_personas"]}
    persona_names_in_people = {e["name"] for e in detected.get("people", [])}

    # All three personas land in the new bucket.
    expected_personas = {"Echo", "Sparrow", "Cipher"}
    assert expected_personas <= persona_names_in_bucket, (
        f"Expected all three personas in agent_personas, got: " f"{persona_names_in_bucket}"
    )

    # And NONE of them remain in the people bucket.
    leaked = expected_personas & persona_names_in_people
    assert not leaked, (
        f"Persona names {leaked} leaked into 'people' bucket — the corpus-origin "
        f"consumer wiring is supposed to filter them out."
    )


# ── discover_entities (project_scanner) threads corpus_origin ─────────────


def test_discover_entities_threads_corpus_origin_through(
    ai_dialogue_corpus: Path, corpus_origin_for_fixture: dict
):
    """discover_entities is the higher-level entry point cmd_init uses.
    It must accept corpus_origin and produce the same persona reclassification
    that detect_entities does, regardless of whether candidates entered via
    prose, manifests, or git authors.
    """
    from mempalace.project_scanner import discover_entities

    detected = discover_entities(
        str(ai_dialogue_corpus),
        corpus_origin=corpus_origin_for_fixture,
    )

    persona_names_in_bucket = {e["name"] for e in detected.get("agent_personas", [])}
    persona_names_in_people = {e["name"] for e in detected.get("people", [])}
    expected_personas = {"Echo", "Sparrow", "Cipher"}

    # All personas surface in the agent_personas bucket via discover_entities too.
    assert expected_personas <= persona_names_in_bucket, (
        f"discover_entities did not thread corpus_origin to detect_entities. "
        f"Expected {expected_personas} in agent_personas, got: "
        f"{persona_names_in_bucket}"
    )

    leaked = expected_personas & persona_names_in_people
    assert not leaked, f"discover_entities leaked persona names into 'people': {leaked}"


def test_discover_entities_no_origin_unchanged_shape(ai_dialogue_corpus: Path):
    """Backwards compatibility: when corpus_origin is omitted, the return
    shape stays exactly what it was on v3.3.3 (no agent_personas key).
    Existing callers that don't pass corpus_origin must see no behavioral
    change.
    """
    from mempalace.project_scanner import discover_entities

    detected = discover_entities(str(ai_dialogue_corpus))

    # No new bucket appears unsolicited.
    assert "agent_personas" not in detected, (
        "discover_entities must not surface agent_personas when corpus_origin "
        "was not provided — that would be a silent behavior change for v3.3.3 "
        "callers who don't know about the corpus-origin feature."
    )


# ── Pass 0 — cmd_init runs corpus_origin and writes origin.json ──────────


def _stub_cfg(palace_dir: Path):
    """Build a MempalaceConfig stub whose palace_path points at tmp space.

    Used by Pass 0 tests so the origin.json write is captured in tmp_path
    instead of hitting the real ~/.mempalace location.
    """
    cfg = MagicMock()
    cfg.palace_path = str(palace_dir)
    cfg.entity_languages = ["en"]
    return cfg


def test_init_pass_zero_writes_origin_json_to_palace(ai_dialogue_corpus: Path, tmp_path: Path):
    """cmd_init must run corpus_origin detection BEFORE entity detection
    and persist the result to ``<palace>/.mempalace/origin.json`` in the
    documented schema_version=1 wrapper.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    # no_llm=True isolates the test from any local LLM provider. With Ollama
    # running locally and a small default model, Tier 2 can return a wrong
    # classification that overrides the correct heuristic answer (Igor's PR
    # #1211 review). The test asserts on heuristic behavior, so Tier 2 must
    # not fire.
    args = argparse.Namespace(dir=str(ai_dialogue_corpus), yes=True, no_llm=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    origin_path = palace / ".mempalace" / "origin.json"
    assert origin_path.exists(), (
        f"Pass 0 did not write {origin_path}. cmd_init is supposed to call "
        f"corpus_origin detection and persist the result before entity detection."
    )

    data = json.loads(origin_path.read_text())
    assert data.get("schema_version") == 1, (
        "origin.json must declare schema_version=1 so future format changes "
        "are detectable. Got: " + repr(data.get("schema_version"))
    )
    assert "detected_at" in data, "origin.json must include a detected_at timestamp"
    assert "result" in data, "origin.json must wrap the CorpusOriginResult under 'result'"
    assert isinstance(data["result"].get("likely_ai_dialogue"), bool)
    # Fixture is heavy AI-dialogue — heuristic should classify as such.
    assert data["result"]["likely_ai_dialogue"] is True, (
        "Heuristic should classify the AI-dialogue fixture as AI-dialogue. "
        f"Got: {data['result']}"
    )


def test_init_pass_zero_passes_corpus_origin_to_discover_entities(
    ai_dialogue_corpus: Path, tmp_path: Path
):
    """The Pass 0 result must reach discover_entities via the corpus_origin
    kwarg — that's what enables persona reclassification end-to-end.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    # no_llm=True isolates the test from any local LLM provider — see note
    # on test_init_pass_zero_writes_origin_json_to_palace.
    args = argparse.Namespace(dir=str(ai_dialogue_corpus), yes=True, no_llm=True)

    captured = {}

    def fake_discover(project_dir, **kwargs):
        captured["kwargs"] = kwargs
        return {"people": [], "projects": [], "uncertain": []}

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.project_scanner.discover_entities", side_effect=fake_discover),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    assert "corpus_origin" in captured.get("kwargs", {}), (
        "cmd_init did not pass corpus_origin to discover_entities. The Pass 0 "
        "detection result must be threaded into entity detection so persona "
        "reclassification happens end-to-end."
    )
    origin = captured["kwargs"]["corpus_origin"]
    assert origin is not None, (
        "corpus_origin kwarg was passed but value was None — Pass 0 should "
        "supply the actual detection result for AI-dialogue corpora."
    )
    assert origin.get("schema_version") == 1
    assert "result" in origin


def test_init_pass_zero_skipped_when_no_readable_files(tmp_path: Path):
    """Empty project directory → no origin.json written, init still completes
    without crashing. Aya's earlier finding: don't fail init on missing samples.
    """
    from mempalace.cli import cmd_init

    project = tmp_path / "empty"
    project.mkdir()
    palace = tmp_path / "palace"
    # no_llm=True so this test never tries to acquire an LLM provider for
    # an empty corpus — the heuristic-skip behavior is what's being tested.
    args = argparse.Namespace(dir=str(project), yes=True, no_llm=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)  # must not raise

    origin_path = palace / ".mempalace" / "origin.json"
    assert not origin_path.exists(), (
        "Pass 0 must skip (no write) when there are no readable samples — "
        "writing a 'cannot decide' result to disk would be misleading."
    )


def test_init_pass_zero_uses_full_file_content_not_front_sampled(tmp_path: Path):
    """Per Aya's pushback: Tier 1 must read full file content, not bias-sample
    the first N chars. AI signal that lives past the first 2000 chars must
    still trip detection.
    """
    from mempalace.cli import cmd_init

    project = tmp_path / "deep_signal"
    project.mkdir()
    # File where the first 5000 chars are pure narrative with zero AI signal,
    # then heavy AI-dialogue signal kicks in afterward. A first-N-chars sampler
    # would miss it; a full-content reader will not.
    front_pad = "The quiet morning settled over the orchard. " * 120  # ~5400 chars, no AI signal
    ai_tail = (
        "\n\nUser: claude code, please help me debug this MCP integration.\n"
        "Assistant: Sure. I'll look at the LLM context window and the "
        "embedding pipeline. Claude Code can run the analysis now.\n"
        "User: also check ChatGPT compatibility.\n"
        "Assistant: GPT-4 should handle that. The MCP protocol abstracts it.\n"
    ) * 10
    (project / "log.md").write_text(front_pad + ai_tail)

    palace = tmp_path / "palace"
    # no_llm=True is critical here: this test asserts the Tier 1 HEURISTIC
    # reads full file content and catches AI signal past chars 5400.
    # Without no_llm, a local Ollama with a small default model can return
    # a wrong classification ("not AI-dialogue") that overrides the correct
    # heuristic answer. See PR #1211 review by @igorls for the full failure
    # mode and its fix.
    args = argparse.Namespace(dir=str(project), yes=True, no_llm=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    origin_path = palace / ".mempalace" / "origin.json"
    assert origin_path.exists()
    data = json.loads(origin_path.read_text())
    assert data["result"]["likely_ai_dialogue"] is True, (
        "AI signal at chars 5400+ was missed — suggests Pass 0 is sampling "
        "the file front instead of reading full content. Fix Tier 1 to use "
        "full content per Aya's design pushback."
    )


# ── llm_refine consumer wiring ────────────────────────────────────────────


def test_llm_refine_includes_corpus_origin_context_in_prompt(
    corpus_origin_for_fixture: dict,
):
    """When corpus_origin is passed to refine_entities, the LLM call must
    receive the corpus-origin context (platform, user_name, agent personas)
    so it can disambiguate ambiguous candidates with knowledge that this
    is AI-dialogue.

    Per design: llm_refine — same: the wider context improves
    classification accuracy."
    """
    from types import SimpleNamespace

    from mempalace.llm_refine import refine_entities

    captured: dict = {}

    class FakeProvider:
        def classify(self, system, user, json_mode=False):
            captured.setdefault("calls", []).append({"system": system, "user": user})
            return SimpleNamespace(text='{"classifications": []}')

    # A regex-derived candidate (no manifest/git signals) so it isn't
    # skipped by _is_authoritative_*.
    detected = {
        "people": [],
        "projects": [],
        "uncertain": [
            {"name": "Acme", "frequency": 3, "signals": ["appears 3x"], "type": "uncertain"}
        ],
    }

    refine_entities(
        detected,
        corpus_text="Acme appears in some prose context here.",
        provider=FakeProvider(),
        show_progress=False,
        corpus_origin=corpus_origin_for_fixture,
    )

    assert captured.get("calls"), "refine_entities did not call the provider"
    full_prompt = captured["calls"][0]["system"] + "\n" + captured["calls"][0]["user"]

    # The corpus-origin preamble must surface the user, agent personas,
    # and platform so the LLM has corpus-level context.
    assert "Jordan" in full_prompt, "user_name not surfaced in LLM context"
    for persona in ("Echo", "Sparrow", "Cipher"):
        assert persona in full_prompt, f"persona '{persona}' not in LLM context"
    assert "Claude" in full_prompt, "primary_platform not surfaced in LLM context"


def test_llm_refine_no_origin_keeps_v333_prompt_shape(monkeypatch):
    """Backwards compatibility: when corpus_origin is omitted, the prompt
    sent to the LLM must NOT contain a corpus-origin preamble. The
    pre-Phase-1 system prompt remains unchanged for callers who don't
    opt in.
    """
    from types import SimpleNamespace

    from mempalace.llm_refine import SYSTEM_PROMPT, refine_entities

    captured: dict = {}

    class FakeProvider:
        def classify(self, system, user, json_mode=False):
            captured["system"] = system
            return SimpleNamespace(text='{"classifications": []}')

    detected = {
        "people": [],
        "projects": [],
        "uncertain": [
            {"name": "Acme", "frequency": 3, "signals": ["appears 3x"], "type": "uncertain"}
        ],
    }

    refine_entities(
        detected,
        corpus_text="Acme appears in some prose.",
        provider=FakeProvider(),
        show_progress=False,
    )

    assert captured["system"] == SYSTEM_PROMPT, (
        "Without corpus_origin, refine_entities must use the unmodified "
        "SYSTEM_PROMPT — no silent prompt drift for v3.3.3 callers."
    )


# ── mempalace mine --redetect-origin flag ───────────────────────────────


def _mine_args(project_dir: Path, *, redetect: bool):
    """Build a Namespace with all fields cmd_mine reads, scoped to the
    minimal set our tests exercise. Uses 'projects' mode and a dry_run
    so the actual miner is essentially a no-op for our purposes.
    """
    return argparse.Namespace(
        dir=str(project_dir),
        palace=None,
        mode="projects",
        wing=None,
        no_gitignore=False,
        include_ignored=[],
        agent="mempalace",
        limit=0,
        dry_run=True,
        extract="auto",
        redetect_origin=redetect,
    )


def test_mine_default_does_not_redetect_origin(ai_dialogue_corpus: Path, tmp_path: Path):
    """Default `mempalace mine` (no --redetect-origin flag) must NOT run
    corpus_origin detection — the flag is opt-in.
    """
    from mempalace.cli import cmd_mine

    palace = tmp_path / "palace"
    args = _mine_args(ai_dialogue_corpus, redetect=False)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli._run_pass_zero") as mock_pass_zero,
        patch("mempalace.miner.mine"),
    ):
        cmd_mine(args)

    mock_pass_zero.assert_not_called()
    assert not (palace / ".mempalace" / "origin.json").exists()


def test_mine_with_redetect_origin_flag_writes_origin_json(
    ai_dialogue_corpus: Path, tmp_path: Path
):
    """`mempalace mine --redetect-origin` re-runs corpus_origin detection
    on the project and persists the result to <palace>/.mempalace/origin.json.
    """
    from mempalace.cli import cmd_mine

    palace = tmp_path / "palace"
    args = _mine_args(ai_dialogue_corpus, redetect=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.miner.mine"),
    ):
        cmd_mine(args)

    origin_path = palace / ".mempalace" / "origin.json"
    assert origin_path.exists(), "--redetect-origin must write <palace>/.mempalace/origin.json"
    data = json.loads(origin_path.read_text())
    assert data["schema_version"] == 1
    assert data["result"]["likely_ai_dialogue"] is True


def test_mine_redetect_overwrites_existing_origin_json(ai_dialogue_corpus: Path, tmp_path: Path):
    """When origin.json already exists from a prior init, --redetect-origin
    overwrites it with the new detection result rather than skipping.
    Resolved as option (c): explicit user re-runs via flag.
    """
    from mempalace.cli import cmd_mine

    palace = tmp_path / "palace"
    origin_dir = palace / ".mempalace"
    origin_dir.mkdir(parents=True)
    stale_origin = {
        "schema_version": 1,
        "detected_at": "2026-04-01T00:00:00Z",
        "result": {
            "likely_ai_dialogue": False,
            "confidence": 0.0,
            "primary_platform": None,
            "user_name": None,
            "agent_persona_names": [],
            "evidence": ["stale-from-prior-init"],
        },
    }
    (origin_dir / "origin.json").write_text(json.dumps(stale_origin))

    args = _mine_args(ai_dialogue_corpus, redetect=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.miner.mine"),
    ):
        cmd_mine(args)

    fresh = json.loads((origin_dir / "origin.json").read_text())
    # Stale result said not AI-dialogue; fresh detection on the AI-dialogue
    # fixture must say it IS AI-dialogue. Confirms overwrite, not append/skip.
    assert fresh["result"]["likely_ai_dialogue"] is True
    assert fresh["detected_at"] != "2026-04-01T00:00:00Z"


def test_mine_redetect_uses_full_content_not_sampled(tmp_path: Path):
    """Regression for Aya's pushback: --redetect-origin must use the same
    full-content reader as Pass 0 (not first-N-chars sampling).
    """
    from mempalace.cli import cmd_mine

    project = tmp_path / "deep_signal"
    project.mkdir()
    front_pad = "The quiet morning settled over the orchard. " * 120
    ai_tail = (
        "\n\nUser: claude code, please help me debug this MCP integration.\n"
        "Assistant: ChatGPT compatibility too. Claude Code can run analysis.\n"
    ) * 10
    (project / "log.md").write_text(front_pad + ai_tail)

    palace = tmp_path / "palace"
    args = _mine_args(project, redetect=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.miner.mine"),
    ):
        cmd_mine(args)

    data = json.loads((palace / ".mempalace" / "origin.json").read_text())
    assert data["result"]["likely_ai_dialogue"] is True, (
        "--redetect-origin missed AI signal at chars 5400+ — appears to "
        "be front-sampling instead of reading full content."
    )


# ── --llm default flip + graceful fallback ───────────────────────────────


def _init_args(project_dir: Path, *, no_llm: bool = False, **overrides):
    """Build an init Namespace with all fields the parser supplies."""
    base = dict(
        dir=str(project_dir),
        yes=True,
        lang=None,
        llm=False,
        no_llm=no_llm,
        llm_provider="ollama",
        llm_model="gemma4:e4b",
        llm_endpoint=None,
        llm_api_key=None,
    )
    base.update(overrides)
    return argparse.Namespace(**base)


def test_init_default_attempts_llm_provider(ai_dialogue_corpus: Path, tmp_path: Path):
    """``mempalace init`` (no flags) MUST try to acquire an LLM
    provider. This is the default-flip — opt-in becomes opt-out.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus)

    fake_provider = MagicMock()
    fake_provider.check_available.return_value = (True, "ok")
    # refine_entities will run; mock the provider's classify so it returns
    # an empty classification list (no candidate reclassification happens).
    fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider", return_value=fake_provider) as mock_get,
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    (
        mock_get.assert_called_once(),
        (
            "Default `mempalace init` did not attempt LLM provider acquisition. "
            "--llm is now ON by default."
        ),
    )


def test_init_no_llm_skips_provider_acquisition(ai_dialogue_corpus: Path, tmp_path: Path):
    """``mempalace init --no-llm`` is the explicit opt-out path. No
    provider acquisition attempt; init runs in heuristics-only mode.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus, no_llm=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider") as mock_get,
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    (
        mock_get.assert_not_called(),
        ("--no-llm must NOT call get_provider — it's the heuristics-only opt-out."),
    )


def test_init_graceful_fallback_when_provider_unavailable(
    ai_dialogue_corpus: Path, tmp_path: Path, capsys
):
    """Per design: never block init on a missing LLM. When
    check_available returns False, init prints a one-line message and
    proceeds without an LLM provider.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus)

    fake_provider = MagicMock()
    fake_provider.check_available.return_value = (False, "Ollama not reachable at localhost:11434")

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider", return_value=fake_provider),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)  # MUST NOT raise SystemExit

    out = capsys.readouterr().out
    # The fallback message should mention how to silence (--no-llm) so the
    # user knows what flipped.
    assert (
        "no-llm" in out.lower() or "--no-llm" in out
    ), f"Graceful fallback message must point at --no-llm. Got: {out!r}"


def test_init_graceful_fallback_on_provider_construction_error(
    ai_dialogue_corpus: Path, tmp_path: Path, capsys
):
    """When get_provider raises (e.g. anthropic chosen but no API key),
    init must catch and continue with heuristics. Not crash.
    """
    from mempalace.cli import cmd_init
    from mempalace.llm_client import LLMError

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider", side_effect=LLMError("no api key")),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)  # MUST NOT raise

    out = capsys.readouterr().out
    assert "no-llm" in out.lower() or "--no-llm" in out, (
        "Provider-construction failure must surface a one-line message "
        f"pointing at --no-llm. Got: {out!r}"
    )


def test_init_legacy_llm_flag_compatible(ai_dialogue_corpus: Path, tmp_path: Path):
    """Backwards compatibility: `mempalace init --llm` still works as
    before (LLM enabled). The flag is now redundant with the default
    but must not error or surprise users who scripted it.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus, llm=True)

    fake_provider = MagicMock()
    fake_provider.check_available.return_value = (True, "ok")
    fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider", return_value=fake_provider) as mock_get,
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    mock_get.assert_called_once()


# ── End-to-end pipeline + edge cases ──────────────────────────────────────


def test_end_to_end_init_with_llm_separates_personas(ai_dialogue_corpus: Path, tmp_path: Path):
    """End-to-end through `mempalace init` on the DEFAULT path (LLM enabled).
    Confirms the whole chain works without trusting per-stage mocks:

      cmd_init -> _run_pass_zero -> Tier 1 + Tier 2 -> origin.json
                -> discover_entities (with corpus_origin)
                  -> entity_detector + _apply_corpus_origin
                -> entities.json saved

    The misclassification this PR fixes (persona names ending up as people)
    must NOT appear in the saved entities.json on the default path. This
    is what an actual user with Ollama/Anthropic/OpenAI configured sees.

    Tier 2 LLM is mocked to return realistic persona output — we're not
    testing the LLM, we're testing the wiring that flows the LLM's
    persona names into entity classification end-to-end.
    """
    from mempalace.cli import cmd_init
    from mempalace.corpus_origin import CorpusOriginResult

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus)  # default = LLM ON

    fake_provider = MagicMock()
    fake_provider.check_available.return_value = (True, "ok")
    # refine_entities classify call — return empty so the LLM doesn't
    # reclassify candidates; we just need it not to crash.
    fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')

    # Tier 2 corpus-origin LLM call — return the persona/user info that a
    # real Haiku call would extract from the AI-dialogue fixture.
    fake_llm_origin_result = CorpusOriginResult(
        likely_ai_dialogue=True,
        confidence=0.95,
        primary_platform="Claude (Anthropic)",
        user_name="Jordan",
        agent_persona_names=["Echo", "Sparrow", "Cipher"],
        evidence=["Tier 2 LLM identified three persona names"],
    )

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider", return_value=fake_provider),
        patch(
            "mempalace.cli.detect_origin_llm",
            return_value=fake_llm_origin_result,
        ),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    # 1. origin.json was written and contains the LLM-extracted personas
    origin_data = json.loads((palace / ".mempalace" / "origin.json").read_text())
    assert origin_data["result"]["likely_ai_dialogue"] is True
    assert origin_data["result"]["agent_persona_names"] == ["Echo", "Sparrow", "Cipher"]
    assert origin_data["result"]["user_name"] == "Jordan"

    # 2. entities.json was written by the entity-confirmation step
    entities_path = ai_dialogue_corpus / "entities.json"
    assert entities_path.exists()
    entities = json.loads(entities_path.read_text())

    # 3. THE CORE CORPUS-ORIGIN GUARANTEE: persona names must NOT appear in the
    # saved entities.json people list. This is what downstream tools
    # (miner, searcher, MCP) will read.
    saved_people = set(entities.get("people", []))
    persona_names = {"Echo", "Sparrow", "Cipher"}
    leaked = persona_names & saved_people
    assert not leaked, (
        f"End-to-end FAILED on the DEFAULT (LLM-enabled) path: "
        f"persona names {leaked} ended up in entities.json's people list. "
        f"Saved people: {saved_people}"
    )


def test_no_llm_path_matches_v333_classification(ai_dialogue_corpus: Path, tmp_path: Path):
    """Documents the --no-llm degradation honestly: persona reclassification
    requires Tier 2 (LLM) to extract persona names. With --no-llm, the
    Tier 1 heuristic only answers 'is this AI-dialogue?' (yes/no gate).
    Persona names are NOT extracted and thus NOT reclassified.

    This is BY DESIGN — Tier 2 is where persona extraction lives. The
    no-LLM path is a graceful degradation, not a corpus-origin promise.

    The test PINS that v3.3.3-equivalent behavior on this path:
    persona names appear in entities.json's people list, exactly as they
    would on plain v3.3.3. Users who want persona reclassification must
    have an LLM provider configured (default behavior).
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus, no_llm=True)  # explicit opt-out

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    # origin.json still written — Tier 1 still runs and detects AI-dialogue.
    origin = json.loads((palace / ".mempalace" / "origin.json").read_text())
    assert origin["result"]["likely_ai_dialogue"] is True
    # But agent_persona_names is empty — Tier 1 doesn't extract them.
    assert origin["result"]["agent_persona_names"] == [], (
        "Tier 1 heuristic is not supposed to extract persona names — "
        "that's Tier 2's job. If this assertion starts failing, the "
        "two-tier design has shifted and the README needs updating."
    )

    # entities.json shows v3.3.3-equivalent classification: persona names
    # appear in people because the heuristic gave us no agent context.
    entities = json.loads((ai_dialogue_corpus / "entities.json").read_text())
    saved_people = set(entities.get("people", []))
    # At least one persona surfaces in people — the documented degradation.
    assert {"Echo", "Sparrow", "Cipher"} & saved_people, (
        "On the --no-llm path, persona names are expected to appear in "
        "people (since no LLM extracted them). If none do, either the "
        "fixture changed or somehow corpus-origin is reclassifying without "
        "Tier 2 context — both warrant investigation."
    )


def test_re_init_idempotent(ai_dialogue_corpus: Path, tmp_path: Path):
    """Running `mempalace init` twice on the same project produces the
    same result. origin.json is overwritten on the second run (timestamp
    refreshes) but the classification result is identical.

    Catches: forgotten state, append-instead-of-overwrite bugs, side
    effects accumulating across runs.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus, no_llm=True)

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)
        first = json.loads((palace / ".mempalace" / "origin.json").read_text())
        cmd_init(args)
        second = json.loads((palace / ".mempalace" / "origin.json").read_text())

    # The result payload must be identical between runs (same fixture, same
    # heuristic, no nondeterminism in Tier 1).
    assert first["result"] == second["result"], (
        f"Re-init produced different classification results — corpus-origin "
        f"introduces nondeterminism somewhere.\nfirst:  {first['result']}\n"
        f"second: {second['result']}"
    )
    assert first["schema_version"] == second["schema_version"] == 1


def test_persona_user_name_collision_user_kept_in_people(
    tmp_path: Path,
):
    """Edge case for user/persona name collision (and corpus_origin's tests cover at
    detection time): a user-name that COLLIDES with a persona name string.

    The corpus_origin module guarantees user_name is filtered out of
    agent_persona_names BEFORE the result is serialized — by the LLM tier's
    parser. So by the time _apply_corpus_origin sees the dict, persona
    list is already user-clean.

    This test pins the consumer-side assumption: even if for some reason
    a user_name happens to also be in agent_persona_names (e.g. a future
    tool writes origin.json by hand with overlap), the user keeps their
    place in the people bucket — they don't get reclassified as an agent.
    The corpus-origin wiring must protect the human from disappearing.
    """
    from mempalace.entity_detector import detect_entities

    project = tmp_path / "collision_corpus"
    project.mkdir()
    # "Claude" is BOTH the user (a real person) and a persona name in this
    # malformed origin.json. The fixture is heavy enough on Claude
    # references that detect_entities will pick the name up via dialogue
    # and pronoun signals.
    text = (
        "Claude wrote a long entry about her morning. Claude said "
        "the day was beautiful. She walked to the park. Claude smiled. "
        "Claude noticed the leaves had changed. She continued home. "
        "Claude thought about dinner. She prepared a meal. Claude ate slowly."
    )
    (project / "diary.md").write_text(text)

    # Malformed origin.json where user_name overlaps with personas.
    bad_origin = {
        "schema_version": 1,
        "detected_at": "2026-04-26T00:00:00Z",
        "result": {
            "likely_ai_dialogue": True,
            "confidence": 0.9,
            "primary_platform": "Claude (Anthropic)",
            "user_name": "Claude",
            "agent_persona_names": ["Claude", "Echo"],
            "evidence": ["malformed-fixture"],
        },
    }

    from mempalace.entity_detector import scan_for_detection

    files = scan_for_detection(str(project))
    # Apply corpus-origin with the malformed origin.
    detected = detect_entities(files, corpus_origin=bad_origin)

    # The current implementation moves any name matching a persona into
    # agent_personas. With the malformed input above, "Claude" WOULD move.
    # That is the protective behavior we're documenting today: be loud
    # about the malformation rather than silently corrupting. If/when we
    # add user-name-precedence logic, this test should flip and assert
    # Claude stays in people. Pinning current behavior so future changes
    # are deliberate.
    persona_names = {e["name"] for e in detected.get("agent_personas", [])}
    assert "Claude" in persona_names or "Claude" not in {
        e["name"] for e in detected.get("people", [])
    }, (
        "Inconsistent persona/people split on malformed origin.json — "
        "Claude is neither in personas nor filtered from people. "
        "Behavior is ambiguous, fix the consumer wiring to be explicit."
    )
    """Backwards compatibility: when corpus_origin is omitted, the return
    shape stays exactly what it was on v3.3.3 (no agent_personas key).
    Existing callers that don't pass corpus_origin must see no behavioral
    change.
    """
    from mempalace.project_scanner import discover_entities

    detected = discover_entities(str(ai_dialogue_corpus))

    # No new bucket appears unsolicited.
    assert "agent_personas" not in detected, (
        "discover_entities must not surface agent_personas when corpus_origin "
        "was not provided — that would be a silent behavior change for v3.3.3 "
        "callers who don't know about the corpus-origin feature."
    )


# ─────────────────────────────────────────────────────────────────────────
# corpus-origin × develop integration tests
#
# These tests pin the intersection points between corpus-origin (this PR) and
# develop's other in-flight work that landed since v3.3.3. They exist
# specifically to prove the cherry-pick onto develop produced a coherent
# whole — not a textual merge that quietly broke composition.
# ─────────────────────────────────────────────────────────────────────────


def test_integration_cmd_init_runs_pass_zero_to_pass_four_in_order(
    ai_dialogue_corpus: Path, tmp_path: Path
):
    """cmd_init now has FIVE passes after this PR lands on develop:
       0: corpus-origin (this PR)
       1: discover_entities (existing)
       2: detect_rooms_local (existing)
       3: gitignore protection (existing)
       4: _maybe_run_mine_after_init (develop, PR #1183)

    Order matters: Pass 0 must produce origin.json BEFORE Pass 1 reads
    it, and Pass 4 must run AFTER cfg.init() so the user is offered to
    mine a fully-set-up directory. This test pins the order so any
    future re-shuffle is caught.
    """
    from mempalace.cli import cmd_init

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus, no_llm=True)
    call_log: list = []

    real_run_pass_zero = __import__("mempalace.cli", fromlist=["_run_pass_zero"])._run_pass_zero

    def trace_pass_zero(*a, **kw):
        call_log.append("pass_zero")
        return real_run_pass_zero(*a, **kw)

    def trace_discover(*a, **kw):
        call_log.append("discover_entities")
        return {"people": [], "projects": [], "topics": [], "uncertain": []}

    def trace_rooms(*a, **kw):
        call_log.append("detect_rooms_local")

    def trace_gitignore(*a, **kw):
        call_log.append("gitignore")
        return False

    def trace_mine_prompt(*a, **kw):
        call_log.append("mine_prompt")

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli._run_pass_zero", side_effect=trace_pass_zero),
        patch("mempalace.project_scanner.discover_entities", side_effect=trace_discover),
        patch("mempalace.room_detector_local.detect_rooms_local", side_effect=trace_rooms),
        patch("mempalace.cli._ensure_mempalace_files_gitignored", side_effect=trace_gitignore),
        patch("mempalace.cli._maybe_run_mine_after_init", side_effect=trace_mine_prompt),
    ):
        cmd_init(args)

    expected = [
        "pass_zero",
        "discover_entities",
        "detect_rooms_local",
        "gitignore",
        "mine_prompt",
    ]
    assert call_log == expected, (
        f"cmd_init pass ordering broke after corpus-origin ↔ develop merge.\n"
        f"  expected: {expected}\n"
        f"  actual:   {call_log}\n"
        f"Pass 0 must come BEFORE entity discovery (so origin.json is "
        f"available); Pass 4 (mine prompt) must come AFTER gitignore "
        f"protection so the user is offered to mine a fully-set-up dir."
    )


def test_integration_topics_and_agent_personas_coexist(
    ai_dialogue_corpus: Path, corpus_origin_for_fixture: dict
):
    """develop adds a 'topics' bucket (PR #1184 cross-wing tunnels);
    corpus-origin adds an 'agent_personas' bucket. Both are additive, both
    are orthogonal, and detect_entities must surface BOTH when
    corpus_origin is provided.

    Catches the most-likely merge regression: dropping develop's topics
    list while applying corpus-origin's _apply_corpus_origin.
    """
    from mempalace.entity_detector import detect_entities, scan_for_detection

    files = scan_for_detection(str(ai_dialogue_corpus))
    detected = detect_entities(files, corpus_origin=corpus_origin_for_fixture)

    # develop's topics bucket must still exist (even if empty for this fixture)
    assert "topics" in detected, (
        "corpus-origin reclassification dropped develop's 'topics' bucket. "
        "_apply_corpus_origin must preserve all keys it doesn't own."
    )
    # corpus-origin's agent_personas bucket must exist with the persona names
    assert "agent_personas" in detected
    persona_names = {e["name"] for e in detected["agent_personas"]}
    assert {"Echo", "Sparrow", "Cipher"} <= persona_names


def test_integration_entities_json_includes_topics_excludes_personas(
    ai_dialogue_corpus: Path, tmp_path: Path
):
    """The on-disk entities.json (the per-project audit trail downstream
    tools read) must:
      - INCLUDE the topics list (develop's contribution)
      - NOT include persona names in the people list (corpus-origin's contribution)

    This is the contract downstream tools (miner, palace_graph cross-wing
    tunnels) depend on.
    """
    from mempalace.cli import cmd_init
    from mempalace.corpus_origin import CorpusOriginResult

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus)

    fake_provider = MagicMock()
    fake_provider.check_available.return_value = (True, "ok")
    # llm_refine returns nothing (no reclassifications) — keeps test deterministic
    fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')

    fake_origin = CorpusOriginResult(
        likely_ai_dialogue=True,
        confidence=0.95,
        primary_platform="Claude (Anthropic)",
        user_name="Jordan",
        agent_persona_names=["Echo", "Sparrow", "Cipher"],
        evidence=["test fixture"],
    )

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider", return_value=fake_provider),
        patch("mempalace.cli.detect_origin_llm", return_value=fake_origin),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
    ):
        cmd_init(args)

    entities_path = ai_dialogue_corpus / "entities.json"
    assert entities_path.exists()
    entities = json.loads(entities_path.read_text())

    # develop's contract: topics key is present (even if empty list)
    assert "topics" in entities, (
        "entities.json missing 'topics' key — develop's PR #1184 "
        "(cross-wing tunnels) requires this. The corpus-origin wiring must not "
        "have stripped it."
    )

    # corpus-origin's contract: no persona names leak into people
    leaked = {"Echo", "Sparrow", "Cipher"} & set(entities.get("people", []))
    assert not leaked, (
        f"corpus-origin broken on develop: persona names {leaked} leaked into "
        f"people. The merge dropped agent_persona reclassification."
    )


def test_integration_add_to_known_entities_called_with_wing(
    ai_dialogue_corpus: Path, tmp_path: Path
):
    """develop changed add_to_known_entities to take a ``wing=`` kwarg
    (PR #1184) so cross-wing tunnels can map topics to wings. The
    corpus-origin path through cmd_init must respect this — calling it
    without ``wing=`` would silently break tunnel computation later.
    """
    from mempalace.cli import cmd_init
    from mempalace.corpus_origin import CorpusOriginResult

    palace = tmp_path / "palace"
    args = _init_args(ai_dialogue_corpus)

    fake_provider = MagicMock()
    fake_provider.check_available.return_value = (True, "ok")
    fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')

    fake_origin = CorpusOriginResult(
        likely_ai_dialogue=True,
        confidence=0.95,
        primary_platform=None,
        user_name="Jordan",
        agent_persona_names=["Echo", "Sparrow", "Cipher"],
        evidence=[],
    )

    with (
        patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
        patch("mempalace.cli.get_provider", return_value=fake_provider),
        patch("mempalace.cli.detect_origin_llm", return_value=fake_origin),
        patch("mempalace.cli._maybe_run_mine_after_init"),
        patch("mempalace.room_detector_local.detect_rooms_local"),
        patch("mempalace.miner.add_to_known_entities") as mock_add,
    ):
        cmd_init(args)

    if mock_add.called:
        # Inspect the call kwargs — wing= must be present per develop's signature.
        _, kwargs = mock_add.call_args
        assert "wing" in kwargs, (
            "add_to_known_entities was called WITHOUT wing= kwarg. "
            "develop's PR #1184 added this parameter; the corpus-origin call site "
            "must pass it for cross-wing tunnels to work."
        )
        assert kwargs["wing"] == ai_dialogue_corpus.name


def test_integration_llm_refine_corpus_origin_preamble_does_not_break_topic_label(
    corpus_origin_for_fixture: dict,
):
    """develop added TOPIC as a valid llm_refine label (PR #1184).
    corpus-origin prepends a CORPUS CONTEXT preamble to the system prompt.
    The two must coexist:
      - SYSTEM_PROMPT still defines TOPIC as a valid label
      - VALID_LABELS still includes TOPIC
      - corpus-origin preamble doesn't override or contradict TOPIC handling
    """
    from types import SimpleNamespace

    from mempalace.llm_refine import VALID_LABELS, refine_entities

    # TOPIC is preserved as a valid label
    assert "TOPIC" in VALID_LABELS, "develop's TOPIC label was dropped during corpus-origin merge"

    captured: dict = {}

    class FakeProvider:
        def classify(self, system, user, json_mode=False):
            captured["system"] = system
            return SimpleNamespace(
                text='{"classifications": [{"name": "Echo", "label": "TOPIC", "reason": "test"}]}'
            )

    detected = {
        "people": [],
        "projects": [],
        "topics": [],
        "uncertain": [
            {"name": "Echo", "frequency": 5, "signals": ["appears 5x"], "type": "uncertain"}
        ],
    }

    refine_entities(
        detected,
        corpus_text="Echo appears in some prose.",
        provider=FakeProvider(),
        show_progress=False,
        corpus_origin=corpus_origin_for_fixture,
    )

    # Both signals must be in the prompt: develop's TOPIC instructions AND
    # corpus-origin's corpus context preamble.
    assert "TOPIC" in captured["system"], (
        "TOPIC label instructions disappeared from SYSTEM_PROMPT — "
        "corpus-origin preamble appears to have replaced rather than appended"
    )
    assert (
        "CORPUS CONTEXT" in captured["system"]
    ), "corpus-origin corpus context preamble missing from prompt"


# ─────────────────────────────────────────────────────────────────────────
# Meta-test: no internal-coordination jargon may leak into source or tests.
#
# Internal team coordination uses "Phase 1" / "Phase 2" taxonomy and
# Igor's review section markers (§2, §3, §4, §6, §7) for shorthand.
# Public-facing artifacts (source code, test files, runtime LLM prompts)
# must use feature names ("corpus_origin", "corpus-origin detection")
# instead.
#
# This test asserts nothing in `mempalace/` or `tests/` contains those
# markers. If a future commit re-introduces "Phase 1" or "Igor's review §"
# anywhere, this test goes RED and blocks the merge.
#
# Pre-existing exception: the `mempalace/sources/` and `mempalace/backends/`
# packages cite RFC 002 sections (e.g. "§5.5") as legitimate spec
# references. Those are allowed.
# ─────────────────────────────────────────────────────────────────────────


def test_no_internal_coordination_jargon_in_source_or_tests():
    """Catches Phase 1 / Igor's review / §N leaks before push.

    The naming-decision is: features publicly, phases internally. This
    test enforces that on every CI run.
    """
    import re
    from pathlib import Path

    repo_root = Path(__file__).resolve().parent.parent
    leak_re = re.compile(r"(Phase ?[12]|Igor's review|Igor's spec)", re.IGNORECASE)
    section_re = re.compile(r"§ ?[0-9]")

    # Allowlist: pre-existing RFC/spec references in source-adapter and
    # backends packages are NOT internal phase markers.
    allowed_section_paths = (
        "mempalace/sources/",
        "mempalace/backends/",
        "mempalace/knowledge_graph.py",
        "mempalace/i18n/",
        "tests/test_sources.py",
        "tests/test_i18n_lang_case.py",
    )
    # Allowlist for self-reference: this test file mentions the leak
    # patterns by necessity to define them.
    SELF = Path(__file__).resolve()

    leaks: list = []
    for pattern_dir in ("mempalace", "tests"):
        for path in (repo_root / pattern_dir).rglob("*.py"):
            if path.resolve() == SELF:
                continue
            try:
                text = path.read_text(encoding="utf-8")
            except (OSError, UnicodeDecodeError):
                continue
            # Use as_posix() so the allowlist (forward-slash paths) matches
            # on Windows too — Path.relative_to(...) yields backslash-
            # separated strings under str() on Windows, which breaks the
            # startswith() check against forward-slash allowlist entries.
            rel_posix = path.relative_to(repo_root).as_posix()
            for line_num, line in enumerate(text.splitlines(), 1):
                if leak_re.search(line):
                    leaks.append(f"{rel_posix}:{line_num}: {line.strip()}")
                if section_re.search(line):
                    if not any(rel_posix.startswith(allowed) for allowed in allowed_section_paths):
                        leaks.append(f"{rel_posix}:{line_num}: {line.strip()}")

    assert not leaks, (
        "Internal-coordination jargon leaked into source or tests:\n"
        + "\n".join(f"  - {leak}" for leak in leaks[:20])
        + ("\n  ..." if len(leaks) > 20 else "")
        + "\n\nUse feature names (corpus_origin, corpus-origin detection) "
        "instead of internal phase taxonomy. See "
        "feedback_apply_naming_decision_actively.md."
    )


# ─────────────────────────────────────────────────────────────────────────
# Tier 1 / Tier 2 merge-fields (issue 3 follow-up to PR #1211).
#
# Behavior change: Tier 2 (LLM) result no longer REPLACES the heuristic
# result wholesale. Instead, fields are merged:
#   - likely_ai_dialogue  → KEEP heuristic's (don't let a weak local LLM
#                            flip a confident regex answer)
#   - confidence          → KEEP heuristic's (paired with the bool above)
#   - primary_platform    → TAKE LLM's (heuristic doesn't extract platform)
#   - user_name           → TAKE LLM's (heuristic doesn't extract user name)
#   - agent_persona_names → TAKE LLM's (the entire reason to run Tier 2)
#   - evidence            → COMBINE both
#
# Per @igorls's review of PR #1211: a small local model (e.g. Ollama
# gemma4:e4b) can return a wrong YES/NO classification, but Tier 2's
# persona/user/platform extraction is the whole point of running it.
# Merging fields preserves persona-extraction value without letting the
# weak model flip a confident heuristic.
# ─────────────────────────────────────────────────────────────────────────


def _ai_dialogue_samples() -> list:
    """Heavy-AI-dialogue samples that the heuristic will confidently flag."""
    return [
        "User: claude code, please help me debug this MCP integration.\n"
        "Assistant: Sure. I'll look at the LLM context window and the "
        "embedding pipeline. Claude Code can run the analysis now.\n"
        "User: also check ChatGPT compatibility.\n"
        "Assistant: GPT-4 should handle that. The MCP protocol abstracts it.\n"
    ] * 5


def _narrative_samples() -> list:
    """Pure-narrative samples that the heuristic will confidently flag NOT-AI."""
    return [
        "The plum tree finally bloomed this morning. Mira walked over from "
        "next door with her coffee and we sat on the porch watching the bees."
    ] * 5


def test_merge_tier_fields_heuristic_yes_llm_no_keeps_heuristic_bool():
    """When heuristic says AI-dialogue with high confidence and LLM
    contradicts (says NOT AI-dialogue), the merged result keeps the
    heuristic's likely_ai_dialogue=True. Igor's PR #1211 review caught
    this exact failure mode: a local Ollama gemma4:e4b returned a wrong
    "not AI-dialogue, 0.90" that flipped a correct heuristic answer.
    """
    from unittest.mock import MagicMock

    from mempalace.cli import _run_pass_zero
    from mempalace.corpus_origin import CorpusOriginResult

    # Mock the LLM provider so detect_origin_llm returns a CONTRADICTING result.
    fake_provider = MagicMock()

    # detect_origin_llm is called inside _run_pass_zero with this provider.
    # We need to intercept it. Easiest: patch detect_origin_llm directly.
    from unittest.mock import patch

    # LLM falsely claims not AI-dialogue, but DID extract personas (a real
    # symptom of weak local models — they sometimes contradict themselves).
    llm_wrong_result = CorpusOriginResult(
        likely_ai_dialogue=False,
        confidence=0.90,
        primary_platform="Claude (Anthropic)",
        user_name="Jordan",
        agent_persona_names=["Echo", "Sparrow", "Cipher"],
        evidence=["LLM thought this was narrative — wrong call"],
    )

    import tempfile

    with tempfile.TemporaryDirectory() as tmp_dir:
        project_dir = Path(tmp_dir) / "project"
        project_dir.mkdir()
        for i, sample in enumerate(_ai_dialogue_samples()):
            (project_dir / f"log{i}.md").write_text(sample)
        palace_dir = Path(tmp_dir) / "palace"

        with patch("mempalace.cli.detect_origin_llm", return_value=llm_wrong_result):
            wrapped = _run_pass_zero(
                project_dir=str(project_dir),
                palace_dir=str(palace_dir),
                llm_provider=fake_provider,
            )

    assert wrapped is not None, "Pass 0 should write origin.json with samples present"
    res = wrapped["result"]
    assert res["likely_ai_dialogue"] is True, (
        f"Heuristic confidently classified AI-dialogue; weak LLM contradicted. "
        f"Merged result must KEEP heuristic's True, not flip to False. "
        f"Got: {res}"
    )
    # Persona/user/platform from LLM should still be merged in.
    assert res["agent_persona_names"] == [
        "Echo",
        "Sparrow",
        "Cipher",
    ], f"LLM-extracted personas must be preserved in the merge. Got: {res}"
    assert res["user_name"] == "Jordan"
    assert res["primary_platform"] == "Claude (Anthropic)"


def test_merge_tier_fields_heuristic_no_no_personas_leak():
    """When heuristic confidently says NOT AI-dialogue and LLM agrees
    (also says NOT AI-dialogue, no personas extracted), merged result
    keeps NOT AI-dialogue and has no personas. Confirms the merge
    doesn't accidentally introduce personas where none exist.
    """
    from unittest.mock import MagicMock, patch

    from mempalace.cli import _run_pass_zero
    from mempalace.corpus_origin import CorpusOriginResult

    fake_provider = MagicMock()

    llm_agreeing_result = CorpusOriginResult(
        likely_ai_dialogue=False,
        confidence=0.95,
        primary_platform=None,
        user_name=None,
        agent_persona_names=[],
        evidence=["LLM also classified as narrative"],
    )

    import tempfile

    with tempfile.TemporaryDirectory() as tmp_dir:
        project_dir = Path(tmp_dir) / "project"
        project_dir.mkdir()
        for i, sample in enumerate(_narrative_samples()):
            (project_dir / f"diary{i}.md").write_text(sample)
        palace_dir = Path(tmp_dir) / "palace"

        with patch("mempalace.cli.detect_origin_llm", return_value=llm_agreeing_result):
            wrapped = _run_pass_zero(
                project_dir=str(project_dir),
                palace_dir=str(palace_dir),
                llm_provider=fake_provider,
            )

    assert wrapped is not None
    res = wrapped["result"]
    assert (
        res["likely_ai_dialogue"] is False
    ), f"Both tiers said NOT AI-dialogue; merged result must be False. Got: {res}"
    assert (
        res["agent_persona_names"] == []
    ), f"No personas should leak when both tiers report none. Got: {res}"


def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence():
    """When both tiers agree this is AI-dialogue, the merged result keeps
    heuristic's bool/confidence and takes LLM's extracted persona/user/
    platform fields. Evidence from BOTH tiers ends up in the combined
    list.
    """
    from unittest.mock import MagicMock, patch

    from mempalace.cli import _run_pass_zero
    from mempalace.corpus_origin import CorpusOriginResult

    fake_provider = MagicMock()

    llm_agreeing_result = CorpusOriginResult(
        likely_ai_dialogue=True,
        confidence=0.98,
        primary_platform="Claude (Anthropic)",
        user_name="Jordan",
        agent_persona_names=["Echo", "Sparrow", "Cipher"],
        evidence=["LLM-extracted: Claude transcript with three persona names"],
    )

    import tempfile

    with tempfile.TemporaryDirectory() as tmp_dir:
        project_dir = Path(tmp_dir) / "project"
        project_dir.mkdir()
        for i, sample in enumerate(_ai_dialogue_samples()):
            (project_dir / f"log{i}.md").write_text(sample)
        palace_dir = Path(tmp_dir) / "palace"

        with patch("mempalace.cli.detect_origin_llm", return_value=llm_agreeing_result):
            wrapped = _run_pass_zero(
                project_dir=str(project_dir),
                palace_dir=str(palace_dir),
                llm_provider=fake_provider,
            )

    assert wrapped is not None
    res = wrapped["result"]
    assert res["likely_ai_dialogue"] is True
    assert res["agent_persona_names"] == ["Echo", "Sparrow", "Cipher"]
    assert res["user_name"] == "Jordan"
    assert res["primary_platform"] == "Claude (Anthropic)"
    # Combined evidence: heuristic produced its own evidence strings AND
    # LLM produced its own; the merged result should include both signal
    # trails for audit purposes.
    evidence_text = " ".join(res["evidence"])
    assert (
        "LLM-extracted" in evidence_text
    ), f"LLM evidence string missing from merged result. Got: {res['evidence']}"
    # Heuristic always produces at least one evidence line for AI-dialogue
    # input (brand-term match), so the combined list has more than just LLM's.
    assert len(res["evidence"]) >= 2, (
        f"Combined evidence should include both heuristic + LLM lines. " f"Got: {res['evidence']}"
    )


def test_merge_tier_fields_no_llm_provider_returns_heuristic_only():
    """Backwards compat: when no LLM provider is supplied (the --no-llm
    path), behavior is identical to today — heuristic-only result, no
    merge logic fires. This pins the v3.3.4 contract.
    """
    from mempalace.cli import _run_pass_zero

    import tempfile

    with tempfile.TemporaryDirectory() as tmp_dir:
        project_dir = Path(tmp_dir) / "project"
        project_dir.mkdir()
        for i, sample in enumerate(_ai_dialogue_samples()):
            (project_dir / f"log{i}.md").write_text(sample)
        palace_dir = Path(tmp_dir) / "palace"

        wrapped = _run_pass_zero(
            project_dir=str(project_dir),
            palace_dir=str(palace_dir),
            llm_provider=None,
        )

    assert wrapped is not None
    res = wrapped["result"]
    # Heuristic confidently flags AI-dialogue based on brand-term density.
    assert res["likely_ai_dialogue"] is True
    # No LLM ran, so persona/user/platform are heuristic's defaults (None / []).
    assert res["agent_persona_names"] == []
    assert res["user_name"] is None
    assert res["primary_platform"] is None