feat: add closet layer — searchable index pointing to drawers
The closet architecture was always part of MemPalace's design but never shipped in the public codebase. This adds it. Palace now has TWO collections: - mempalace_drawers — full verbatim content (unchanged) - mempalace_closets — compact AAAK-style index entries How it works: - When mining, each file gets a closet alongside its drawers - Closet contains extracted topics, entities, quotes as pointers - Closets pack up to 1500 chars, topics never split mid-entry - Search hits closets first (fast, small), then hydrates the full drawer content for matching files - Falls back to direct drawer search if no closets exist yet Files changed: - palace.py: get_closets_collection(), build_closet_text(), upsert_closet(), CLOSET_CHAR_LIMIT - miner.py: process_file() now creates closets after drawers - searcher.py: search_memories() tries closet-first search, hydrates drawers, falls back to direct search Backwards compatible — existing palaces without closets continue to work via the fallback path. Closets are created on next mine. 689/689 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+24
-1
@@ -15,7 +15,10 @@ from pathlib import Path
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock
|
from .palace import (
|
||||||
|
SKIP_DIRS, get_collection, get_closets_collection,
|
||||||
|
file_already_mined, mine_lock, build_closet_text, upsert_closet,
|
||||||
|
)
|
||||||
|
|
||||||
READABLE_EXTENSIONS = {
|
READABLE_EXTENSIONS = {
|
||||||
".txt",
|
".txt",
|
||||||
@@ -410,6 +413,7 @@ def process_file(
|
|||||||
rooms: list,
|
rooms: list,
|
||||||
agent: str,
|
agent: str,
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
|
closets_col=None,
|
||||||
) -> tuple:
|
) -> tuple:
|
||||||
"""Read, chunk, route, and file one file. Returns (drawer_count, room_name)."""
|
"""Read, chunk, route, and file one file. Returns (drawer_count, room_name)."""
|
||||||
|
|
||||||
@@ -466,6 +470,22 @@ def process_file(
|
|||||||
if added:
|
if added:
|
||||||
drawers_added += 1
|
drawers_added += 1
|
||||||
|
|
||||||
|
# Build closet — the searchable index pointing to these drawers
|
||||||
|
if closets_col and drawers_added > 0:
|
||||||
|
drawer_ids = [
|
||||||
|
f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
|
||||||
|
for c in chunks
|
||||||
|
]
|
||||||
|
closet_text = build_closet_text(source_file, drawer_ids, content, wing, room)
|
||||||
|
closet_id = f"closet_{wing}_{room}_{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
|
||||||
|
upsert_closet(closets_col, closet_id, closet_text, {
|
||||||
|
"wing": wing,
|
||||||
|
"room": room,
|
||||||
|
"source_file": source_file,
|
||||||
|
"drawer_count": drawers_added,
|
||||||
|
"filed_at": datetime.now().isoformat(),
|
||||||
|
})
|
||||||
|
|
||||||
return drawers_added, room
|
return drawers_added, room
|
||||||
|
|
||||||
|
|
||||||
@@ -586,8 +606,10 @@ def mine(
|
|||||||
|
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
collection = get_collection(palace_path)
|
collection = get_collection(palace_path)
|
||||||
|
closets_col = get_closets_collection(palace_path)
|
||||||
else:
|
else:
|
||||||
collection = None
|
collection = None
|
||||||
|
closets_col = None
|
||||||
|
|
||||||
total_drawers = 0
|
total_drawers = 0
|
||||||
files_skipped = 0
|
files_skipped = 0
|
||||||
@@ -602,6 +624,7 @@ def mine(
|
|||||||
rooms=rooms,
|
rooms=rooms,
|
||||||
agent=agent,
|
agent=agent,
|
||||||
dry_run=dry_run,
|
dry_run=dry_run,
|
||||||
|
closets_col=closets_col,
|
||||||
)
|
)
|
||||||
if drawers == 0 and not dry_run:
|
if drawers == 0 and not dry_run:
|
||||||
files_skipped += 1
|
files_skipped += 1
|
||||||
|
|||||||
@@ -52,6 +52,68 @@ def get_collection(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_closets_collection(palace_path: str, create: bool = True):
|
||||||
|
"""Get the closets collection — the searchable index layer."""
|
||||||
|
return get_collection(palace_path, collection_name="mempalace_closets", create=create)
|
||||||
|
|
||||||
|
|
||||||
|
CLOSET_CHAR_LIMIT = 1500 # fill closet until ~1500 chars, then start a new one
|
||||||
|
|
||||||
|
|
||||||
|
def build_closet_text(source_file, drawer_ids, content, wing, room):
|
||||||
|
"""Build a compact closet entry from drawer content.
|
||||||
|
|
||||||
|
Extracts topics, names, and key quotes into an AAAK-style pointer
|
||||||
|
that tells the searcher which drawers to open.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
# Extract proper nouns (capitalized words, 2+ occurrences)
|
||||||
|
words = re.findall(r"\b[A-Z][a-z]{2,}\b", content[:5000])
|
||||||
|
word_freq = {}
|
||||||
|
for w in words:
|
||||||
|
word_freq[w] = word_freq.get(w, 0) + 1
|
||||||
|
entities = sorted([w for w, c in word_freq.items() if c >= 2], key=lambda w: -word_freq[w])[:5]
|
||||||
|
|
||||||
|
# Extract key phrases
|
||||||
|
topics = []
|
||||||
|
for pattern in [
|
||||||
|
r"(?:built|fixed|wrote|added|pushed|tested|created|decided|migrated)\s+[\w\s]{3,30}",
|
||||||
|
]:
|
||||||
|
topics.extend(re.findall(pattern, content[:5000], re.IGNORECASE))
|
||||||
|
topics = list(dict.fromkeys(t.strip().lower() for t in topics))[:8]
|
||||||
|
|
||||||
|
# Extract first quote
|
||||||
|
quotes = re.findall(r'"([^"]{15,100})"', content[:5000])
|
||||||
|
quote = quotes[0] if quotes else ""
|
||||||
|
|
||||||
|
# Build pointer lines
|
||||||
|
entity_str = ";".join(entities[:5]) if entities else ""
|
||||||
|
lines = []
|
||||||
|
for topic in topics:
|
||||||
|
pointer = f"{topic}|{entity_str}|→{','.join(drawer_ids[:3])}"
|
||||||
|
lines.append(pointer)
|
||||||
|
if quote:
|
||||||
|
lines.append(f'"{quote}"|{entity_str}|→{",".join(drawer_ids[:3])}')
|
||||||
|
if not lines:
|
||||||
|
lines.append(f"{wing}/{room}|{entity_str}|→{','.join(drawer_ids[:3])}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def upsert_closet(closets_col, closet_id, closet_text, metadata):
|
||||||
|
"""Add or update a closet. Respects CLOSET_CHAR_LIMIT."""
|
||||||
|
try:
|
||||||
|
existing = closets_col.get(ids=[closet_id])
|
||||||
|
if existing.get("ids"):
|
||||||
|
old_text = existing["documents"][0]
|
||||||
|
if len(old_text) + len(closet_text) + 1 <= CLOSET_CHAR_LIMIT:
|
||||||
|
closet_text = old_text + "\n" + closet_text
|
||||||
|
# else: start fresh — old closet was full
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
closets_col.upsert(documents=[closet_text], ids=[closet_id], metadatas=[metadata])
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def mine_lock(source_file: str):
|
def mine_lock(source_file: str):
|
||||||
"""Cross-platform file lock for mine operations.
|
"""Cross-platform file lock for mine operations.
|
||||||
|
|||||||
+70
-3
@@ -9,7 +9,7 @@ Returns verbatim text — the actual words, never summaries.
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .palace import get_collection
|
from .palace import get_collection, get_closets_collection
|
||||||
|
|
||||||
logger = logging.getLogger("mempalace_mcp")
|
logger = logging.getLogger("mempalace_mcp")
|
||||||
|
|
||||||
@@ -117,7 +117,7 @@ def search_memories(
|
|||||||
0.0 disables filtering. Typical useful range: 0.3–1.0.
|
0.0 disables filtering. Typical useful range: 0.3–1.0.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
col = get_collection(palace_path, create=False)
|
drawers_col = get_collection(palace_path, create=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("No palace found at %s: %s", palace_path, e)
|
logger.error("No palace found at %s: %s", palace_path, e)
|
||||||
return {
|
return {
|
||||||
@@ -127,6 +127,73 @@ def search_memories(
|
|||||||
|
|
||||||
where = build_where_filter(wing, room)
|
where = build_where_filter(wing, room)
|
||||||
|
|
||||||
|
# Try closet-first search: search the compact index, then hydrate drawers
|
||||||
|
closet_hits = []
|
||||||
|
try:
|
||||||
|
closets_col = get_closets_collection(palace_path, create=False)
|
||||||
|
ckwargs = {
|
||||||
|
"query_texts": [query],
|
||||||
|
"n_results": n_results * 2, # over-fetch closets to find best drawers
|
||||||
|
"include": ["documents", "metadatas", "distances"],
|
||||||
|
}
|
||||||
|
if where:
|
||||||
|
ckwargs["where"] = where
|
||||||
|
closet_results = closets_col.query(**ckwargs)
|
||||||
|
if closet_results["documents"][0]:
|
||||||
|
closet_hits = list(zip(
|
||||||
|
closet_results["documents"][0],
|
||||||
|
closet_results["metadatas"][0],
|
||||||
|
closet_results["distances"][0],
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
pass # no closets yet — fall through to direct drawer search
|
||||||
|
|
||||||
|
# If closets found results, hydrate the referenced drawers
|
||||||
|
if closet_hits:
|
||||||
|
import re
|
||||||
|
seen_sources = set()
|
||||||
|
hits = []
|
||||||
|
for closet_doc, closet_meta, closet_dist in closet_hits:
|
||||||
|
source = closet_meta.get("source_file", "")
|
||||||
|
if source in seen_sources:
|
||||||
|
continue
|
||||||
|
seen_sources.add(source)
|
||||||
|
|
||||||
|
# Find drawers for this source file
|
||||||
|
try:
|
||||||
|
drawer_results = drawers_col.get(
|
||||||
|
where={"source_file": source},
|
||||||
|
include=["documents", "metadatas"],
|
||||||
|
)
|
||||||
|
if drawer_results.get("ids"):
|
||||||
|
# Combine all drawer content for this file
|
||||||
|
full_text = "\n\n".join(drawer_results["documents"])
|
||||||
|
meta = drawer_results["metadatas"][0]
|
||||||
|
hits.append({
|
||||||
|
"text": full_text,
|
||||||
|
"wing": meta.get("wing", "unknown"),
|
||||||
|
"room": meta.get("room", "unknown"),
|
||||||
|
"source_file": Path(source).name,
|
||||||
|
"similarity": round(max(0.0, 1 - closet_dist), 3),
|
||||||
|
"distance": round(closet_dist, 4),
|
||||||
|
"matched_via": "closet",
|
||||||
|
"closet_preview": closet_doc[:200],
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if len(hits) >= n_results:
|
||||||
|
break
|
||||||
|
|
||||||
|
if hits:
|
||||||
|
return {
|
||||||
|
"query": query,
|
||||||
|
"filters": {"wing": wing, "room": room},
|
||||||
|
"total_before_filter": len(closet_hits),
|
||||||
|
"results": hits,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fallback: direct drawer search (no closets yet, or closets empty)
|
||||||
try:
|
try:
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"query_texts": [query],
|
"query_texts": [query],
|
||||||
@@ -136,7 +203,7 @@ def search_memories(
|
|||||||
if where:
|
if where:
|
||||||
kwargs["where"] = where
|
kwargs["where"] = where
|
||||||
|
|
||||||
results = col.query(**kwargs)
|
results = drawers_col.query(**kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": f"Search error: {e}"}
|
return {"error": f"Search error: {e}"}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user