From 971b92da5d879e444a5f44210a6f2b084c75bf9a Mon Sep 17 00:00:00 2001
From: MSL <232237854+milla-jovovich@users.noreply.github.com>
Date: Mon, 13 Apr 2026 07:46:07 -0300
Subject: [PATCH] feat(search): drawer-grep returns best-matching chunk +
 neighbors

When a closet hit leads to a source file with many drawers, grep each
chunk for query terms and return the BEST-MATCHING chunk + 1 neighbor
on each side, instead of dumping the whole file truncated at
MAX_HYDRATION_CHARS. Result now includes drawer_index and
total_drawers so callers can request adjacent drawers explicitly.

Extracted from Milla's commit 935f657 which bundled drawer-grep with
closet_llm (deferred pending LLM_ENDPOINT refactor) and fact_checker
(separate PR). Ported only the searcher.py change.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 mempalace/searcher.py | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/mempalace/searcher.py b/mempalace/searcher.py
index 37795fc..19b07f4 100644
--- a/mempalace/searcher.py
+++ b/mempalace/searcher.py
@@ -205,6 +205,8 @@ def search_memories(
         pass  # no closets yet — fall through to direct drawer search
 
     # If closets found results, hydrate the referenced drawers
+    MAX_HYDRATION_CHARS = 10000  # cap to prevent blowup on large source files
+
     if closet_hits:
         import re
         seen_sources = set()
@@ -215,18 +217,39 @@ def search_memories(
                 continue
             seen_sources.add(source)
 
-            # Find drawers for this source file
+            # Find drawers for this source file, grep for most relevant chunk
             try:
                 drawer_results = drawers_col.get(
                     where={"source_file": source},
                     include=["documents", "metadatas"],
                 )
                 if drawer_results.get("ids"):
-                    # Combine all drawer content for this file
-                    full_text = "\n\n".join(drawer_results["documents"])
-                    meta = drawer_results["metadatas"][0]
+                    # Drawer-grep: score each chunk against the query,
+                    # return the best-matching chunk first + surrounding context
+                    query_terms = set(re.findall(r'\w{2,}', query.lower()))
+                    best_idx = 0
+                    best_score = -1
+                    for idx, doc in enumerate(drawer_results["documents"]):
+                        doc_lower = doc.lower()
+                        score = sum(1 for t in query_terms if t in doc_lower)
+                        if score > best_score:
+                            best_score = score
+                            best_idx = idx
+
+                    # Build result: best chunk first, then neighbors
+                    docs = drawer_results["documents"]
+                    n_docs = len(docs)
+                    # Include best chunk + 1 before + 1 after for context
+                    start = max(0, best_idx - 1)
+                    end = min(n_docs, best_idx + 2)
+                    relevant_text = "\n\n".join(docs[start:end])
+
+                    if len(relevant_text) > MAX_HYDRATION_CHARS:
+                        relevant_text = relevant_text[:MAX_HYDRATION_CHARS] + f"\n\n[...truncated. {n_docs} total drawers. Use mempalace_get_drawer for full content.]"
+
+                    meta = drawer_results["metadatas"][best_idx]
                     hits.append({
-                        "text": full_text,
+                        "text": relevant_text,
                         "wing": meta.get("wing", "unknown"),
                         "room": meta.get("room", "unknown"),
                         "source_file": Path(source).name,
@@ -234,6 +257,8 @@ def search_memories(
                         "distance": round(closet_dist, 4),
                         "matched_via": "closet",
                         "closet_preview": closet_doc[:200],
+                        "drawer_index": best_idx,
+                        "total_drawers": n_docs,
                     })
             except Exception:
                 pass