feat(search): drawer-grep returns best-matching chunk + neighbors
When a closet hit leads to a source file with many drawers, grep each chunk for query terms and return the BEST-MATCHING chunk + 1 neighbor on each side, instead of dumping the whole file truncated at MAX_HYDRATION_CHARS. Result now includes drawer_index and total_drawers so callers can request adjacent drawers explicitly. Extracted from Milla's commit 935f657 which bundled drawer-grep with closet_llm (deferred pending LLM_ENDPOINT refactor) and fact_checker (separate PR). Ported only the searcher.py change. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+30
-5
@@ -205,6 +205,8 @@ def search_memories(
|
|||||||
pass # no closets yet — fall through to direct drawer search
|
pass # no closets yet — fall through to direct drawer search
|
||||||
|
|
||||||
# If closets found results, hydrate the referenced drawers
|
# If closets found results, hydrate the referenced drawers
|
||||||
|
MAX_HYDRATION_CHARS = 10000 # cap to prevent blowup on large source files
|
||||||
|
|
||||||
if closet_hits:
|
if closet_hits:
|
||||||
import re
|
import re
|
||||||
seen_sources = set()
|
seen_sources = set()
|
||||||
@@ -215,18 +217,39 @@ def search_memories(
|
|||||||
continue
|
continue
|
||||||
seen_sources.add(source)
|
seen_sources.add(source)
|
||||||
|
|
||||||
# Find drawers for this source file
|
# Find drawers for this source file, grep for most relevant chunk
|
||||||
try:
|
try:
|
||||||
drawer_results = drawers_col.get(
|
drawer_results = drawers_col.get(
|
||||||
where={"source_file": source},
|
where={"source_file": source},
|
||||||
include=["documents", "metadatas"],
|
include=["documents", "metadatas"],
|
||||||
)
|
)
|
||||||
if drawer_results.get("ids"):
|
if drawer_results.get("ids"):
|
||||||
# Combine all drawer content for this file
|
# Drawer-grep: score each chunk against the query,
|
||||||
full_text = "\n\n".join(drawer_results["documents"])
|
# return the best-matching chunk first + surrounding context
|
||||||
meta = drawer_results["metadatas"][0]
|
query_terms = set(re.findall(r'\w{2,}', query.lower()))
|
||||||
|
best_idx = 0
|
||||||
|
best_score = -1
|
||||||
|
for idx, doc in enumerate(drawer_results["documents"]):
|
||||||
|
doc_lower = doc.lower()
|
||||||
|
score = sum(1 for t in query_terms if t in doc_lower)
|
||||||
|
if score > best_score:
|
||||||
|
best_score = score
|
||||||
|
best_idx = idx
|
||||||
|
|
||||||
|
# Build result: best chunk first, then neighbors
|
||||||
|
docs = drawer_results["documents"]
|
||||||
|
n_docs = len(docs)
|
||||||
|
# Include best chunk + 1 before + 1 after for context
|
||||||
|
start = max(0, best_idx - 1)
|
||||||
|
end = min(n_docs, best_idx + 2)
|
||||||
|
relevant_text = "\n\n".join(docs[start:end])
|
||||||
|
|
||||||
|
if len(relevant_text) > MAX_HYDRATION_CHARS:
|
||||||
|
relevant_text = relevant_text[:MAX_HYDRATION_CHARS] + f"\n\n[...truncated. {n_docs} total drawers. Use mempalace_get_drawer for full content.]"
|
||||||
|
|
||||||
|
meta = drawer_results["metadatas"][best_idx]
|
||||||
hits.append({
|
hits.append({
|
||||||
"text": full_text,
|
"text": relevant_text,
|
||||||
"wing": meta.get("wing", "unknown"),
|
"wing": meta.get("wing", "unknown"),
|
||||||
"room": meta.get("room", "unknown"),
|
"room": meta.get("room", "unknown"),
|
||||||
"source_file": Path(source).name,
|
"source_file": Path(source).name,
|
||||||
@@ -234,6 +257,8 @@ def search_memories(
|
|||||||
"distance": round(closet_dist, 4),
|
"distance": round(closet_dist, 4),
|
||||||
"matched_via": "closet",
|
"matched_via": "closet",
|
||||||
"closet_preview": closet_doc[:200],
|
"closet_preview": closet_doc[:200],
|
||||||
|
"drawer_index": best_idx,
|
||||||
|
"total_drawers": n_docs,
|
||||||
})
|
})
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|||||||
Reference in New Issue
Block a user