mempalace/searcher.py

#!/usr/bin/env python3
"""
searcher.py — Find anything. Exact words.

Hybrid search: BM25 keyword matching + vector semantic similarity. The
drawer query is the floor — always runs — and closet hits add a rank-based
boost when they agree. Closets are a ranking *signal*, never a gate, so
weak closets (regex extraction on narrative content) can only help, never
hide drawers the direct path would have found.
"""

import logging
import math
import re
from pathlib import Path

from .palace import get_closets_collection, get_collection

# Closet pointer line format: "topic|entities|→drawer_id_a,drawer_id_b"
# Multiple lines may join with newlines inside one closet document.
_CLOSET_DRAWER_REF_RE = re.compile(r"→([\w,]+)")

logger = logging.getLogger("mempalace_mcp")


class SearchError(Exception):
    """Raised when search cannot proceed (e.g. no palace found)."""


_TOKEN_RE = re.compile(r"\w{2,}", re.UNICODE)


def _first_or_empty(results: dict, key: str) -> list:
    """Return the first inner list of a ChromaDB query result, or [].

    ChromaDB returns shapes like ``{"documents": [["a", "b"]], ...}`` for a
    successful query, but ``{"documents": [], ...}`` (empty outer list) when
    the collection is empty or the filter excludes everything. Indexing
    ``[0]`` blindly raises IndexError in that case (issue #195).
    """
    outer = results.get(key)
    if not outer:
        return []
    return outer[0] or []


def _tokenize(text: str) -> list:
    """Lowercase + strip to alphanumeric tokens of length ≥ 2."""
    return _TOKEN_RE.findall(text.lower())


def _bm25_scores(
    query: str,
    documents: list,
    k1: float = 1.5,
    b: float = 0.75,
) -> list:
    """Compute Okapi-BM25 scores for ``query`` against each document.

    IDF is computed over the *provided corpus* using the Lucene/BM25+
    smoothed formula ``log((N - df + 0.5) / (df + 0.5) + 1)``, which is
    always non-negative. This is well-defined for re-ranking a small
    candidate set returned by vector retrieval — IDF then reflects how
    discriminative each query term is *within the candidates*, exactly
    what's needed to reorder them.

    Parameters mirror Okapi-BM25 conventions:
        k1 — term-frequency saturation (1.2-2.0 typical, 1.5 default)
        b  — length normalization (0.0 = none, 1.0 = full, 0.75 default)

    Returns a list of scores in the same order as ``documents``.
    """
    n_docs = len(documents)
    query_terms = set(_tokenize(query))
    if not query_terms or n_docs == 0:
        return [0.0] * n_docs

    tokenized = [_tokenize(d) for d in documents]
    doc_lens = [len(toks) for toks in tokenized]
    if not any(doc_lens):
        return [0.0] * n_docs
    avgdl = sum(doc_lens) / n_docs or 1.0

    # Document frequency: how many docs contain each query term?
    df = {term: 0 for term in query_terms}
    for toks in tokenized:
        seen = set(toks) & query_terms
        for term in seen:
            df[term] += 1

    idf = {term: math.log((n_docs - df[term] + 0.5) / (df[term] + 0.5) + 1) for term in query_terms}

    scores = []
    for toks, dl in zip(tokenized, doc_lens):
        if dl == 0:
            scores.append(0.0)
            continue
        tf: dict = {}
        for t in toks:
            if t in query_terms:
                tf[t] = tf.get(t, 0) + 1
        score = 0.0
        for term, freq in tf.items():
            num = freq * (k1 + 1)
            den = freq + k1 * (1 - b + b * dl / avgdl)
            score += idf[term] * num / den
        scores.append(score)
    return scores


def _hybrid_rank(
    results: list,
    query: str,
    vector_weight: float = 0.6,
    bm25_weight: float = 0.4,
) -> list:
    """Re-rank ``results`` by a convex combination of vector similarity and BM25.

    * Vector similarity uses absolute cosine sim ``max(0, 1 - distance)`` —
      ChromaDB's hnsw cosine distance lives in ``[0, 2]`` (0 = identical).
      Absolute (not relative-to-max) means adding/removing a candidate
      can't reshuffle the others.
    * BM25 is real Okapi-BM25 with corpus-relative IDF over the candidates
      themselves. Since the absolute scale is unbounded, BM25 is min-max
      normalized within the candidate set so weights are commensurable.

    Mutates each result dict to add ``bm25_score`` and reorders the list
    in place. Returns the same list for convenience.
    """
    if not results:
        return results

    docs = [r.get("text", "") for r in results]
    bm25_raw = _bm25_scores(query, docs)
    max_bm25 = max(bm25_raw) if bm25_raw else 0.0
    bm25_norm = [s / max_bm25 for s in bm25_raw] if max_bm25 > 0 else [0.0] * len(bm25_raw)

    scored = []
    for r, raw, norm in zip(results, bm25_raw, bm25_norm):
        vec_sim = max(0.0, 1.0 - r.get("distance", 1.0))
        r["bm25_score"] = round(raw, 3)
        scored.append((vector_weight * vec_sim + bm25_weight * norm, r))

    scored.sort(key=lambda pair: pair[0], reverse=True)
    results[:] = [r for _, r in scored]
    return results


def build_where_filter(wing: str = None, room: str = None) -> dict:
    """Build ChromaDB where filter for wing/room filtering."""
    if wing and room:
        return {"$and": [{"wing": wing}, {"room": room}]}
    elif wing:
        return {"wing": wing}
    elif room:
        return {"room": room}
    return {}


def _extract_drawer_ids_from_closet(closet_doc: str) -> list:
    """Parse all `→drawer_id_a,drawer_id_b` pointers out of a closet document.

    Preserves order and dedupes.
    """
    seen: dict = {}
    for match in _CLOSET_DRAWER_REF_RE.findall(closet_doc):
        for did in match.split(","):
            did = did.strip()
            if did and did not in seen:
                seen[did] = None
    return list(seen.keys())


def _expand_with_neighbors(drawers_col, matched_doc: str, matched_meta: dict, radius: int = 1):
    """Expand a matched drawer with its ±radius sibling chunks in the same source file.

    Motivation — "drawer-grep context" feature: a closet hit returns one
    drawer, but the chunk boundary may clip mid-thought (e.g., the matched
    chunk says "here's a breakdown:" and the actual breakdown lives in the
    next chunk). Fetching the small neighborhood around the match gives
    callers enough context without forcing a follow-up ``get_drawer`` call.

    Returns a dict with:
        ``text``            combined chunks in chunk_index order
        ``drawer_index``    the matched chunk's index in the source file
        ``total_drawers``   total drawer count for the source file (or None)

    On any ChromaDB failure or missing metadata, falls back to returning the
    matched drawer alone so search never breaks because neighbor expansion
    failed.
    """
    src = matched_meta.get("source_file")
    chunk_idx = matched_meta.get("chunk_index")
    if not src or not isinstance(chunk_idx, int):
        return {"text": matched_doc, "drawer_index": chunk_idx, "total_drawers": None}

    target_indexes = [chunk_idx + offset for offset in range(-radius, radius + 1)]
    try:
        neighbors = drawers_col.get(
            where={
                "$and": [
                    {"source_file": src},
                    {"chunk_index": {"$in": target_indexes}},
                ]
            },
            include=["documents", "metadatas"],
        )
    except Exception:
        return {"text": matched_doc, "drawer_index": chunk_idx, "total_drawers": None}

    indexed_docs = []
    for doc, meta in zip(neighbors.get("documents") or [], neighbors.get("metadatas") or []):
        ci = meta.get("chunk_index")
        if isinstance(ci, int):
            indexed_docs.append((ci, doc))
    indexed_docs.sort(key=lambda pair: pair[0])

    if not indexed_docs:
        combined_text = matched_doc
    else:
        combined_text = "\n\n".join(doc for _, doc in indexed_docs)

    # Cheap total_drawers lookup: metadata-only scan of the source file.
    total_drawers = None
    try:
        all_meta = drawers_col.get(where={"source_file": src}, include=["metadatas"])
        ids = all_meta.get("ids") or []
        total_drawers = len(ids) if ids else None
    except Exception:
        pass

    return {
        "text": combined_text,
        "drawer_index": chunk_idx,
        "total_drawers": total_drawers,
    }


def search(query: str, palace_path: str, wing: str = None, room: str = None, n_results: int = 5):
    """
    Search the palace. Returns verbatim drawer content.
    Optionally filter by wing (project) or room (aspect).
    """
    try:
        col = get_collection(palace_path, create=False)
    except Exception:
        print(f"\n  No palace found at {palace_path}")
        print("  Run: mempalace init <dir> then mempalace mine <dir>")
        raise SearchError(f"No palace found at {palace_path}")

    where = build_where_filter(wing, room)

    try:
        kwargs = {
            "query_texts": [query],
            "n_results": n_results,
            "include": ["documents", "metadatas", "distances"],
        }
        if where:
            kwargs["where"] = where

        results = col.query(**kwargs)

    except Exception as e:
        print(f"\n  Search error: {e}")
        raise SearchError(f"Search error: {e}") from e

    docs = _first_or_empty(results, "documents")
    metas = _first_or_empty(results, "metadatas")
    dists = _first_or_empty(results, "distances")

    if not docs:
        print(f'\n  No results found for: "{query}"')
        return

    print(f"\n{'=' * 60}")
    print(f'  Results for: "{query}"')
    if wing:
        print(f"  Wing: {wing}")
    if room:
        print(f"  Room: {room}")
    print(f"{'=' * 60}\n")

    for i, (doc, meta, dist) in enumerate(zip(docs, metas, dists), 1):
        similarity = round(max(0.0, 1 - dist), 3)
        source = Path(meta.get("source_file", "?")).name
        wing_name = meta.get("wing", "?")
        room_name = meta.get("room", "?")

        print(f"  [{i}] {wing_name} / {room_name}")
        print(f"      Source: {source}")
        print(f"      Match:  {similarity}")
        print()
        # Print the verbatim text, indented
        for line in doc.strip().split("\n"):
            print(f"      {line}")
        print()
        print(f"  {'─' * 56}")

    print()


def search_memories(
    query: str,
    palace_path: str,
    wing: str = None,
    room: str = None,
    n_results: int = 5,
    max_distance: float = 0.0,
) -> dict:
    """Programmatic search — returns a dict instead of printing.

    Used by the MCP server and other callers that need data.

    Args:
        query: Natural language search query.
        palace_path: Path to the ChromaDB palace directory.
        wing: Optional wing filter.
        room: Optional room filter.
        n_results: Max results to return.
        max_distance: Max cosine distance threshold. The palace collection uses
            cosine distance (hnsw:space=cosine) — 0 = identical, 2 = opposite.
            Results with distance > this value are filtered out. A value of
            0.0 disables filtering. Typical useful range: 0.3–1.0.
    """
    try:
        drawers_col = get_collection(palace_path, create=False)
    except Exception as e:
        logger.error("No palace found at %s: %s", palace_path, e)
        return {
            "error": "No palace found",
            "hint": "Run: mempalace init <dir> && mempalace mine <dir>",
        }

    where = build_where_filter(wing, room)

    # Hybrid retrieval: always query drawers directly (the floor), then use
    # closet hits to boost rankings. Closets are a ranking SIGNAL, never a
    # GATE — direct drawer search is always the baseline.
    #
    # This avoids the "weak-closets regression" where narrative content
    # produces low-signal closets (regex extraction matches few topics)
    # and closet-first routing hides drawers that direct search would find.
    try:
        dkwargs = {
            "query_texts": [query],
            "n_results": n_results * 3,  # over-fetch for re-ranking
            "include": ["documents", "metadatas", "distances"],
        }
        if where:
            dkwargs["where"] = where
        drawer_results = drawers_col.query(**dkwargs)
    except Exception as e:
        return {"error": f"Search error: {e}"}

    # Gather closet hits (best-per-source) to build a boost lookup.
    closet_boost_by_source: dict = {}  # source_file -> (rank, closet_dist, preview)
    try:
        closets_col = get_closets_collection(palace_path, create=False)
        ckwargs = {
            "query_texts": [query],
            "n_results": n_results * 2,
            "include": ["documents", "metadatas", "distances"],
        }
        if where:
            ckwargs["where"] = where
        closet_results = closets_col.query(**ckwargs)
        for rank, (cdoc, cmeta, cdist) in enumerate(
            zip(
                _first_or_empty(closet_results, "documents"),
                _first_or_empty(closet_results, "metadatas"),
                _first_or_empty(closet_results, "distances"),
            )
        ):
            source = cmeta.get("source_file", "")
            if source and source not in closet_boost_by_source:
                closet_boost_by_source[source] = (rank, cdist, cdoc[:200])
    except Exception:
        pass  # no closets yet — hybrid degrades to pure drawer search

    # Rank-based boost. The ordinal signal ("which closet matched best") is
    # more reliable than absolute distance on narrative content, where
    # closet distances cluster in 1.2-1.5 range regardless of match quality.
    CLOSET_RANK_BOOSTS = [0.40, 0.25, 0.15, 0.08, 0.04]
    CLOSET_DISTANCE_CAP = 1.5  # cosine dist > 1.5 = too weak to use as signal

    scored: list = []
    for doc, meta, dist in zip(
        _first_or_empty(drawer_results, "documents"),
        _first_or_empty(drawer_results, "metadatas"),
        _first_or_empty(drawer_results, "distances"),
    ):
        # Filter on raw distance before rounding to avoid precision loss.
        if max_distance > 0.0 and dist > max_distance:
            continue

        source = meta.get("source_file", "") or ""
        boost = 0.0
        matched_via = "drawer"
        closet_preview = None
        if source in closet_boost_by_source:
            c_rank, c_dist, c_preview = closet_boost_by_source[source]
            if c_dist <= CLOSET_DISTANCE_CAP and c_rank < len(CLOSET_RANK_BOOSTS):
                boost = CLOSET_RANK_BOOSTS[c_rank]
                matched_via = "drawer+closet"
                closet_preview = c_preview

        effective_dist = dist - boost
        entry = {
            "text": doc,
            "wing": meta.get("wing", "unknown"),
            "room": meta.get("room", "unknown"),
            "source_file": Path(source).name if source else "?",
            "created_at": meta.get("filed_at", "unknown"),
            "similarity": round(max(0.0, 1 - effective_dist), 3),
            "distance": round(dist, 4),
            "effective_distance": round(effective_dist, 4),
            "closet_boost": round(boost, 3),
            "matched_via": matched_via,
            # Internal: retain the full source_file path + chunk_index so the
            # enrichment step below doesn't have to reverse-lookup via
            # basename-suffix matching (which silently collides when two
            # files share a basename across different directories).
            "_sort_key": effective_dist,
            "_source_file_full": source,
            "_chunk_index": meta.get("chunk_index"),
        }
        if closet_preview:
            entry["closet_preview"] = closet_preview
        scored.append(entry)

    scored.sort(key=lambda h: h["_sort_key"])
    hits = scored[:n_results]

    # Drawer-grep enrichment: for closet-boosted hits whose source has
    # multiple drawers, return the keyword-best chunk + its immediate
    # neighbors instead of just the drawer vector search landed on. The
    # closet said "this source is relevant"; vector may have picked the
    # wrong chunk within it; grep picks the right one.
    MAX_HYDRATION_CHARS = 10000
    for h in hits:
        if h["matched_via"] == "drawer":
            continue
        full_source = h.get("_source_file_full") or ""
        if not full_source:
            continue
        try:
            source_drawers = drawers_col.get(
                where={"source_file": full_source},
                include=["documents", "metadatas"],
            )
        except Exception:
            continue
        docs = source_drawers.get("documents") or []
        metas_ = source_drawers.get("metadatas") or []
        if len(docs) <= 1:
            continue

        # Sort by chunk_index so best_idx + neighbors are positional.
        indexed = []
        for idx, (d, m) in enumerate(zip(docs, metas_)):
            ci = m.get("chunk_index", idx) if isinstance(m, dict) else idx
            if not isinstance(ci, int):
                ci = idx
            indexed.append((ci, d))
        indexed.sort(key=lambda p: p[0])
        ordered_docs = [d for _, d in indexed]

        query_terms = set(_tokenize(query))
        best_idx, best_score = 0, -1
        for idx, d in enumerate(ordered_docs):
            d_lower = d.lower()
            s = sum(1 for t in query_terms if t in d_lower)
            if s > best_score:
                best_score, best_idx = s, idx

        start = max(0, best_idx - 1)
        end = min(len(ordered_docs), best_idx + 2)
        expanded = "\n\n".join(ordered_docs[start:end])
        if len(expanded) > MAX_HYDRATION_CHARS:
            expanded = (
                expanded[:MAX_HYDRATION_CHARS]
                + f"\n\n[...truncated. {len(ordered_docs)} total drawers. "
                "Use mempalace_get_drawer for full content.]"
            )
        h["text"] = expanded
        h["drawer_index"] = best_idx
        h["total_drawers"] = len(ordered_docs)

    # BM25 hybrid re-rank within the final candidate set.
    hits = _hybrid_rank(hits, query)
    for h in hits:
        h.pop("_sort_key", None)
        h.pop("_source_file_full", None)
        h.pop("_chunk_index", None)

    return {
        "query": query,
        "filters": {"wing": wing, "room": room},
        "total_before_filter": len(_first_or_empty(drawer_results, "documents")),
        "results": hits,
    }
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								#!/usr/bin/env python3
 								"""
 								searcher.py — Find anything. Exact words.
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								Hybrid search: BM25 keyword matching + vector semantic similarity. The
 								drawer query is the floor — always runs — and closet hits add a rank-based
 								boost when they agree. Closets are a ranking *signal*, never a gate, so
 								weak closets (regex extraction on narrative content) can only help, never
 								hide drawers the direct path would have found.
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								"""
-											fix: preserve CLI exit codes, log tracebacks, sanitize search errors, validate fixture
										
										
											2026-04-07 17:38:53 -03:00
+								import logging
-											feat: entity metadata + diary ingest + BM25 hybrid search
										
										
											2026-04-13 01:47:19 -07:00
+								import math
 								import re
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								from pathlib import Path
-											merge: develop + harden closet layer for production
										
										
											2026-04-13 17:00:55 -03:00
+								from .palace import get_closets_collection, get_collection
 								# Closet pointer line format: "topic|entities|→drawer_id_a,drawer_id_b"
 								# Multiple lines may join with newlines inside one closet document.
 								_CLOSET_DRAWER_REF_RE = re.compile(r"→([\w,]+)")
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											fix: preserve CLI exit codes, log tracebacks, sanitize search errors, validate fixture
										
										
											2026-04-07 17:38:53 -03:00
+								logger = logging.getLogger("mempalace_mcp")
 								class SearchError(Exception):
 								    """Raised when search cannot proceed (e.g. no palace found)."""
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											merge: develop + harden entity metadata, BM25, and diary ingest for production
										
										
											2026-04-13 17:37:45 -03:00
+								_TOKEN_RE = re.compile(r"\w{2,}", re.UNICODE)
-											fix(searcher): guard against empty ChromaDB query results (#195) (#865)
										
										
											2026-04-15 09:26:38 +02:00
+								def _first_or_empty(results: dict, key: str) -> list:
 								    """Return the first inner list of a ChromaDB query result, or [].
 								    ChromaDB returns shapes like ``{"documents": [["a", "b"]], ...}`` for a
 								    successful query, but ``{"documents": [], ...}`` (empty outer list) when
 								    the collection is empty or the filter excludes everything. Indexing
 								    ``[0]`` blindly raises IndexError in that case (issue #195).
 								    """
 								    outer = results.get(key)
 								    if not outer:
 								        return []
 								    return outer[0] or []
-											merge: develop + harden entity metadata, BM25, and diary ingest for production
										
										
											2026-04-13 17:37:45 -03:00
+								def _tokenize(text: str) -> list:
 								    """Lowercase + strip to alphanumeric tokens of length ≥ 2."""
 								    return _TOKEN_RE.findall(text.lower())
-											feat: entity metadata + diary ingest + BM25 hybrid search
										
										
											2026-04-13 01:47:19 -07:00
-											merge: develop + harden entity metadata, BM25, and diary ingest for production
										
										
											2026-04-13 17:37:45 -03:00
+								def _bm25_scores(
 								    query: str,
 								    documents: list,
 								    k1: float = 1.5,
 								    b: float = 0.75,
 								) -> list:
 								    """Compute Okapi-BM25 scores for ``query`` against each document.
 								    IDF is computed over the *provided corpus* using the Lucene/BM25+
 								    smoothed formula ``log((N - df + 0.5) / (df + 0.5) + 1)``, which is
 								    always non-negative. This is well-defined for re-ranking a small
 								    candidate set returned by vector retrieval — IDF then reflects how
 								    discriminative each query term is *within the candidates*, exactly
 								    what's needed to reorder them.
 								    Parameters mirror Okapi-BM25 conventions:
 								        k1 — term-frequency saturation (1.2-2.0 typical, 1.5 default)
 								        b  — length normalization (0.0 = none, 1.0 = full, 0.75 default)
 								    Returns a list of scores in the same order as ``documents``.
-											feat: entity metadata + diary ingest + BM25 hybrid search
										
										
											2026-04-13 01:47:19 -07:00
+								    """
-											merge: develop + harden entity metadata, BM25, and diary ingest for production
										
										
											2026-04-13 17:37:45 -03:00
+								    n_docs = len(documents)
 								    query_terms = set(_tokenize(query))
 								    if not query_terms or n_docs == 0:
 								        return [0.0] * n_docs
 								    tokenized = [_tokenize(d) for d in documents]
 								    doc_lens = [len(toks) for toks in tokenized]
 								    if not any(doc_lens):
 								        return [0.0] * n_docs
 								    avgdl = sum(doc_lens) / n_docs or 1.0
 								    # Document frequency: how many docs contain each query term?
 								    df = {term: 0 for term in query_terms}
 								    for toks in tokenized:
 								        seen = set(toks) & query_terms
 								        for term in seen:
 								            df[term] += 1
 								    idf = {term: math.log((n_docs - df[term] + 0.5) / (df[term] + 0.5) + 1) for term in query_terms}
 								    scores = []
 								    for toks, dl in zip(tokenized, doc_lens):
 								        if dl == 0:
 								            scores.append(0.0)
 								            continue
 								        tf: dict = {}
 								        for t in toks:
 								            if t in query_terms:
 								                tf[t] = tf.get(t, 0) + 1
 								        score = 0.0
 								        for term, freq in tf.items():
 								            num = freq * (k1 + 1)
 								            den = freq + k1 * (1 - b + b * dl / avgdl)
 								            score += idf[term] * num / den
 								        scores.append(score)
 								    return scores
 								def _hybrid_rank(
 								    results: list,
 								    query: str,
 								    vector_weight: float = 0.6,
 								    bm25_weight: float = 0.4,
 								) -> list:
 								    """Re-rank ``results`` by a convex combination of vector similarity and BM25.
 								    * Vector similarity uses absolute cosine sim ``max(0, 1 - distance)`` —
 								      ChromaDB's hnsw cosine distance lives in ``[0, 2]`` (0 = identical).
 								      Absolute (not relative-to-max) means adding/removing a candidate
 								      can't reshuffle the others.
 								    * BM25 is real Okapi-BM25 with corpus-relative IDF over the candidates
 								      themselves. Since the absolute scale is unbounded, BM25 is min-max
 								      normalized within the candidate set so weights are commensurable.
 								    Mutates each result dict to add ``bm25_score`` and reorders the list
 								    in place. Returns the same list for convenience.
-											feat: entity metadata + diary ingest + BM25 hybrid search
										
										
											2026-04-13 01:47:19 -07:00
+								    """
-											merge: develop + harden entity metadata, BM25, and diary ingest for production
										
										
											2026-04-13 17:37:45 -03:00
+								    if not results:
 								        return results
 								    docs = [r.get("text", "") for r in results]
 								    bm25_raw = _bm25_scores(query, docs)
 								    max_bm25 = max(bm25_raw) if bm25_raw else 0.0
 								    bm25_norm = [s / max_bm25 for s in bm25_raw] if max_bm25 > 0 else [0.0] * len(bm25_raw)
 								    scored = []
 								    for r, raw, norm in zip(results, bm25_raw, bm25_norm):
 								        vec_sim = max(0.0, 1.0 - r.get("distance", 1.0))
 								        r["bm25_score"] = round(raw, 3)
 								        scored.append((vector_weight * vec_sim + bm25_weight * norm, r))
 								    scored.sort(key=lambda pair: pair[0], reverse=True)
 								    results[:] = [r for _, r in scored]
 								    return results
-											feat: entity metadata + diary ingest + BM25 hybrid search
										
										
											2026-04-13 01:47:19 -07:00
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
+								def build_where_filter(wing: str = None, room: str = None) -> dict:
 								    """Build ChromaDB where filter for wing/room filtering."""
 								    if wing and room:
 								        return {"$and": [{"wing": wing}, {"room": room}]}
 								    elif wing:
 								        return {"wing": wing}
 								    elif room:
 								        return {"room": room}
 								    return {}
-											merge: develop + harden closet layer for production
										
										
											2026-04-13 17:00:55 -03:00
+								def _extract_drawer_ids_from_closet(closet_doc: str) -> list:
 								    """Parse all `→drawer_id_a,drawer_id_b` pointers out of a closet document.
 								    Preserves order and dedupes.
 								    """
 								    seen: dict = {}
 								    for match in _CLOSET_DRAWER_REF_RE.findall(closet_doc):
 								        for did in match.split(","):
 								            did = did.strip()
 								            if did and did not in seen:
 								                seen[did] = None
 								    return list(seen.keys())
-											merge: pr/cross-wing-tunnels + rebuild drawer-grep on hardened closet path
										
										
											2026-04-13 18:08:01 -03:00
+								def _expand_with_neighbors(drawers_col, matched_doc: str, matched_meta: dict, radius: int = 1):
 								    """Expand a matched drawer with its ±radius sibling chunks in the same source file.
 								    Motivation — "drawer-grep context" feature: a closet hit returns one
 								    drawer, but the chunk boundary may clip mid-thought (e.g., the matched
 								    chunk says "here's a breakdown:" and the actual breakdown lives in the
 								    next chunk). Fetching the small neighborhood around the match gives
 								    callers enough context without forcing a follow-up ``get_drawer`` call.
 								    Returns a dict with:
 								        ``text``            combined chunks in chunk_index order
 								        ``drawer_index``    the matched chunk's index in the source file
 								        ``total_drawers``   total drawer count for the source file (or None)
 								    On any ChromaDB failure or missing metadata, falls back to returning the
 								    matched drawer alone so search never breaks because neighbor expansion
 								    failed.
 								    """
 								    src = matched_meta.get("source_file")
 								    chunk_idx = matched_meta.get("chunk_index")
 								    if not src or not isinstance(chunk_idx, int):
 								        return {"text": matched_doc, "drawer_index": chunk_idx, "total_drawers": None}
 								    target_indexes = [chunk_idx + offset for offset in range(-radius, radius + 1)]
 								    try:
 								        neighbors = drawers_col.get(
 								            where={
 								                "$and": [
 								                    {"source_file": src},
 								                    {"chunk_index": {"$in": target_indexes}},
 								                ]
 								            },
 								            include=["documents", "metadatas"],
 								        )
 								    except Exception:
 								        return {"text": matched_doc, "drawer_index": chunk_idx, "total_drawers": None}
 								    indexed_docs = []
 								    for doc, meta in zip(neighbors.get("documents") or [], neighbors.get("metadatas") or []):
 								        ci = meta.get("chunk_index")
 								        if isinstance(ci, int):
 								            indexed_docs.append((ci, doc))
 								    indexed_docs.sort(key=lambda pair: pair[0])
 								    if not indexed_docs:
 								        combined_text = matched_doc
 								    else:
 								        combined_text = "\n\n".join(doc for _, doc in indexed_docs)
 								    # Cheap total_drawers lookup: metadata-only scan of the source file.
 								    total_drawers = None
 								    try:
 								        all_meta = drawers_col.get(where={"source_file": src}, include=["metadatas"])
 								        ids = all_meta.get("ids") or []
 								        total_drawers = len(ids) if ids else None
 								    except Exception:
 								        pass
 								    return {
 								        "text": combined_text,
 								        "drawer_index": chunk_idx,
 								        "total_drawers": total_drawers,
 								    }
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								def search(query: str, palace_path: str, wing: str = None, room: str = None, n_results: int = 5):
 								    """
 								    Search the palace. Returns verbatim drawer content.
 								    Optionally filter by wing (project) or room (aspect).
 								    """
 								    try:
-											Мempalace backend seam (#413)
										
										
											2026-04-11 19:16:49 -04:00
+								        col = get_collection(palace_path, create=False)
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								    except Exception:
 								        print(f"\n  No palace found at {palace_path}")
 								        print("  Run: mempalace init <dir> then mempalace mine <dir>")
-											fix: preserve CLI exit codes, log tracebacks, sanitize search errors, validate fixture
										
										
											2026-04-07 17:38:53 -03:00
+								        raise SearchError(f"No palace found at {palace_path}")
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
+								    where = build_where_filter(wing, room)
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
 								    try:
 								        kwargs = {
 								            "query_texts": [query],
 								            "n_results": n_results,
 								            "include": ["documents", "metadatas", "distances"],
 								        }
 								        if where:
 								            kwargs["where"] = where
 								        results = col.query(**kwargs)
 								    except Exception as e:
 								        print(f"\n  Search error: {e}")
-											fix: preserve CLI exit codes, log tracebacks, sanitize search errors, validate fixture
										
										
											2026-04-07 17:38:53 -03:00
+								        raise SearchError(f"Search error: {e}") from e
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											fix(searcher): guard against empty ChromaDB query results (#195) (#865)
										
										
											2026-04-15 09:26:38 +02:00
+								    docs = _first_or_empty(results, "documents")
 								    metas = _first_or_empty(results, "metadatas")
 								    dists = _first_or_empty(results, "distances")
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
 								    if not docs:
 								        print(f'\n  No results found for: "{query}"')
 								        return
 								    print(f"\n{'=' * 60}")
 								    print(f'  Results for: "{query}"')
 								    if wing:
 								        print(f"  Wing: {wing}")
 								    if room:
 								        print(f"  Room: {room}")
 								    print(f"{'=' * 60}\n")
 								    for i, (doc, meta, dist) in enumerate(zip(docs, metas, dists), 1):
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
+								        similarity = round(max(0.0, 1 - dist), 3)
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								        source = Path(meta.get("source_file", "?")).name
 								        wing_name = meta.get("wing", "?")
 								        room_name = meta.get("room", "?")
 								        print(f"  [{i}] {wing_name} / {room_name}")
 								        print(f"      Source: {source}")
 								        print(f"      Match:  {similarity}")
 								        print()
 								        # Print the verbatim text, indented
 								        for line in doc.strip().split("\n"):
 								            print(f"      {line}")
 								        print()
 								        print(f"  {'─' * 56}")
 								    print()
 								def search_memories(
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
+								    query: str,
 								    palace_path: str,
 								    wing: str = None,
 								    room: str = None,
 								    n_results: int = 5,
 								    max_distance: float = 0.0,
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								) -> dict:
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
+								    """Programmatic search — returns a dict instead of printing.
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								    Used by the MCP server and other callers that need data.
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
 								    Args:
 								        query: Natural language search query.
 								        palace_path: Path to the ChromaDB palace directory.
 								        wing: Optional wing filter.
 								        room: Optional room filter.
 								        n_results: Max results to return.
 								        max_distance: Max cosine distance threshold. The palace collection uses
 								            cosine distance (hnsw:space=cosine) — 0 = identical, 2 = opposite.
 								            Results with distance > this value are filtered out. A value of
 .0 disables filtering. Typical useful range: 0.3–1.0.
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								    """
 								    try:
-											feat: add closet layer — searchable index pointing to drawers
										
										
											2026-04-13 01:33:48 -07:00
+								        drawers_col = get_collection(palace_path, create=False)
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								    except Exception as e:
-											fix: preserve CLI exit codes, log tracebacks, sanitize search errors, validate fixture
										
										
											2026-04-07 17:38:53 -03:00
+								        logger.error("No palace found at %s: %s", palace_path, e)
 								        return {
 								            "error": "No palace found",
 								            "hint": "Run: mempalace init <dir> && mempalace mine <dir>",
 								        }
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
+								    where = build_where_filter(wing, room)
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								    # Hybrid retrieval: always query drawers directly (the floor), then use
 								    # closet hits to boost rankings. Closets are a ranking SIGNAL, never a
 								    # GATE — direct drawer search is always the baseline.
 								    #
 								    # This avoids the "weak-closets regression" where narrative content
 								    # produces low-signal closets (regex extraction matches few topics)
 								    # and closet-first routing hides drawers that direct search would find.
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								    try:
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        dkwargs = {
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								            "query_texts": [query],
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            "n_results": n_results * 3,  # over-fetch for re-ranking
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								            "include": ["documents", "metadatas", "distances"],
 								        }
 								        if where:
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            dkwargs["where"] = where
 								        drawer_results = drawers_col.query(**dkwargs)
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								    except Exception as e:
 								        return {"error": f"Search error: {e}"}
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								    # Gather closet hits (best-per-source) to build a boost lookup.
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								    closet_boost_by_source: dict = {}  # source_file -> (rank, closet_dist, preview)
-											feat: add closet layer — searchable index pointing to drawers
										
										
											2026-04-13 01:33:48 -07:00
+								    try:
 								        closets_col = get_closets_collection(palace_path, create=False)
 								        ckwargs = {
 								            "query_texts": [query],
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            "n_results": n_results * 2,
-											feat: add closet layer — searchable index pointing to drawers
										
										
											2026-04-13 01:33:48 -07:00
+								            "include": ["documents", "metadatas", "distances"],
 								        }
 								        if where:
 								            ckwargs["where"] = where
 								        closet_results = closets_col.query(**ckwargs)
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        for rank, (cdoc, cmeta, cdist) in enumerate(
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            zip(
-											fix(searcher): guard against empty ChromaDB query results (#195) (#865)
										
										
											2026-04-15 09:26:38 +02:00
+								                _first_or_empty(closet_results, "documents"),
 								                _first_or_empty(closet_results, "metadatas"),
 								                _first_or_empty(closet_results, "distances"),
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            )
 								        ):
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								            source = cmeta.get("source_file", "")
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            if source and source not in closet_boost_by_source:
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								                closet_boost_by_source[source] = (rank, cdist, cdoc[:200])
-											feat: add closet layer — searchable index pointing to drawers
										
										
											2026-04-13 01:33:48 -07:00
+								    except Exception:
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        pass  # no closets yet — hybrid degrades to pure drawer search
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								    # Rank-based boost. The ordinal signal ("which closet matched best") is
 								    # more reliable than absolute distance on narrative content, where
 								    # closet distances cluster in 1.2-1.5 range regardless of match quality.
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								    CLOSET_RANK_BOOSTS = [0.40, 0.25, 0.15, 0.08, 0.04]
 								    CLOSET_DISTANCE_CAP = 1.5  # cosine dist > 1.5 = too weak to use as signal
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								    scored: list = []
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								    for doc, meta, dist in zip(
-											fix(searcher): guard against empty ChromaDB query results (#195) (#865)
										
										
											2026-04-15 09:26:38 +02:00
+								        _first_or_empty(drawer_results, "documents"),
 								        _first_or_empty(drawer_results, "metadatas"),
 								        _first_or_empty(drawer_results, "distances"),
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								    ):
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        # Filter on raw distance before rounding to avoid precision loss.
-											feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)
										
										
											2026-04-11 21:25:04 -07:00
+								        if max_distance > 0.0 and dist > max_distance:
 								            continue
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        source = meta.get("source_file", "") or ""
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        boost = 0.0
 								        matched_via = "drawer"
 								        closet_preview = None
 								        if source in closet_boost_by_source:
 								            c_rank, c_dist, c_preview = closet_boost_by_source[source]
 								            if c_dist <= CLOSET_DISTANCE_CAP and c_rank < len(CLOSET_RANK_BOOSTS):
 								                boost = CLOSET_RANK_BOOSTS[c_rank]
 								                matched_via = "drawer+closet"
 								                closet_preview = c_preview
 								        effective_dist = dist - boost
 								        entry = {
 								            "text": doc,
 								            "wing": meta.get("wing", "unknown"),
 								            "room": meta.get("room", "unknown"),
 								            "source_file": Path(source).name if source else "?",
-											feat: include created_at timestamp in search results (#846)
										
										
											2026-04-15 03:26:57 -04:00
+								            "created_at": meta.get("filed_at", "unknown"),
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            "similarity": round(max(0.0, 1 - effective_dist), 3),
 								            "distance": round(dist, 4),
 								            "effective_distance": round(effective_dist, 4),
 								            "closet_boost": round(boost, 3),
 								            "matched_via": matched_via,
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								            # Internal: retain the full source_file path + chunk_index so the
 								            # enrichment step below doesn't have to reverse-lookup via
 								            # basename-suffix matching (which silently collides when two
 								            # files share a basename across different directories).
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            "_sort_key": effective_dist,
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								            "_source_file_full": source,
 								            "_chunk_index": meta.get("chunk_index"),
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        }
 								        if closet_preview:
 								            entry["closet_preview"] = closet_preview
 								        scored.append(entry)
 								    scored.sort(key=lambda h: h["_sort_key"])
 								    hits = scored[:n_results]
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								    # Drawer-grep enrichment: for closet-boosted hits whose source has
 								    # multiple drawers, return the keyword-best chunk + its immediate
 								    # neighbors instead of just the drawer vector search landed on. The
 								    # closet said "this source is relevant"; vector may have picked the
 								    # wrong chunk within it; grep picks the right one.
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								    MAX_HYDRATION_CHARS = 10000
 								    for h in hits:
 								        if h["matched_via"] == "drawer":
 								            continue
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        full_source = h.get("_source_file_full") or ""
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        if not full_source:
 								            continue
 								        try:
 								            source_drawers = drawers_col.get(
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								                where={"source_file": full_source},
 								                include=["documents", "metadatas"],
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            )
 								        except Exception:
 								            continue
 								        docs = source_drawers.get("documents") or []
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        metas_ = source_drawers.get("metadatas") or []
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        if len(docs) <= 1:
 								            continue
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        # Sort by chunk_index so best_idx + neighbors are positional.
 								        indexed = []
 								        for idx, (d, m) in enumerate(zip(docs, metas_)):
 								            ci = m.get("chunk_index", idx) if isinstance(m, dict) else idx
 								            if not isinstance(ci, int):
 								                ci = idx
 								            indexed.append((ci, d))
 								        indexed.sort(key=lambda p: p[0])
 								        ordered_docs = [d for _, d in indexed]
 								        query_terms = set(_tokenize(query))
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        best_idx, best_score = 0, -1
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        for idx, d in enumerate(ordered_docs):
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            d_lower = d.lower()
 								            s = sum(1 for t in query_terms if t in d_lower)
 								            if s > best_score:
 								                best_score, best_idx = s, idx
 								        start = max(0, best_idx - 1)
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        end = min(len(ordered_docs), best_idx + 2)
 								        expanded = "\n\n".join(ordered_docs[start:end])
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								        if len(expanded) > MAX_HYDRATION_CHARS:
 								            expanded = (
 								                expanded[:MAX_HYDRATION_CHARS]
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								                + f"\n\n[...truncated. {len(ordered_docs)} total drawers. "
 								                "Use mempalace_get_drawer for full content.]"
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								            )
 								        h["text"] = expanded
 								        h["drawer_index"] = best_idx
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        h["total_drawers"] = len(ordered_docs)
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								    # BM25 hybrid re-rank within the final candidate set.
-											feat: entity metadata + diary ingest + BM25 hybrid search
										
										
											2026-04-13 01:47:19 -07:00
+								    hits = _hybrid_rank(hits, query)
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
+								    for h in hits:
 								        h.pop("_sort_key", None)
-											merge: pr/closet-llm-generic + harden LLM regen path for production
										
										
											2026-04-13 18:40:36 -03:00
+								        h.pop("_source_file_full", None)
 								        h.pop("_chunk_index", None)
-											fix(search): hybrid closet+drawer retrieval — closets boost, never gate (#795)
										
										
											2026-04-13 08:43:54 -03:00
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								    return {
 								        "query": query,
 								        "filters": {"wing": wing, "room": room},
-											fix(searcher): guard against empty ChromaDB query results (#195) (#865)
										
										
											2026-04-15 09:26:38 +02:00
+								        "total_before_filter": len(_first_or_empty(drawer_results, "documents")),
-											MemPalace: palace architecture, AAAK compression, knowledge graph
										
										
											2026-04-04 18:16:04 -07:00
+								        "results": hits,
 								    }