* feat: MCP reliability — inode detection, WAL rotation, metadata cache, search limits Infrastructure hardening for the MCP server: - Detect palace DB replacement via inode tracking (repair command support) - WAL rotation to prevent unbounded WAL growth - _fetch_all_metadata() + _get_cached_metadata() with 60s TTL for taxonomy/status - _MAX_RESULTS cap (100) with limit clamping [1, _MAX_RESULTS] - max_distance parameter for similarity threshold in search - Handle all notifications/* methods, null arguments, method=None - Remove duplicate _client_cache = None declarations - searcher.py max_distance parameter passthrough Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: new MCP tools (get/list/update drawer, hook settings, memories filed), export, normalize New MCP tools: - mempalace_get_drawer: fetch single drawer by ID with full content - mempalace_list_drawers: paginated listing with wing/room filter - mempalace_update_drawer: update content/wing/room on existing drawers - mempalace_hook_settings: get/set hook behavior (silent_save, desktop_toast) - mempalace_memories_filed_away: check latest checkpoint status Also includes: - exporter.py: export palace as browsable markdown files - normalize.py: tool_use/tool_result capture for richer transcript mining - layers.py: updated for new tool integration - config.py: hook settings properties (hook_silent_save, hook_desktop_toast) Depends on PR 3 (reliability) for _MAX_RESULTS, _metadata_cache, WAL logging. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: normalize.py handles string messages and Read offset type mismatch Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: params null guard, L2→cosine docs, empty tool_use_map key guard - Handle explicit null in MCP params (request.get("params") or {}) - Fix search tool description: L2 → cosine distance (collection uses hnsw:space=cosine) - Guard against empty string key in tool_use_map from malformed JSONL entries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: rename ambiguous var 'l' to 'line' (E741 lint) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address code review findings (5 issues) 1. min_similarity backwards-compat: convert similarity to distance scale (1.0 - similarity) instead of passing raw value as max_distance 2. Restore structured error reporting (error + partial fields) in tool_status, tool_list_wings, tool_list_rooms, tool_get_taxonomy — reverts silent except:pass that dropped #647 security hardening 3. inode cache: remove falsy-zero short-circuit so missing DB file triggers reconnect instead of reusing stale client 4. _fetch_all_metadata: check for empty batch before extending/advancing offset to prevent infinite loop on concurrent deletion 5. KG initialization: only override path when --palace is explicit; default runs use KnowledgeGraph's built-in default path Co-authored-by: jphein <jphein@users.noreply.github.com> --------- Co-authored-by: jp <jp@jphein.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: jphein <jphein@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,153 @@
|
||||
"""
|
||||
exporter.py — Export the palace as a browsable folder of markdown files.
|
||||
|
||||
Produces:
|
||||
output_dir/
|
||||
index.md — table of contents
|
||||
wing_name/
|
||||
room_name.md — one file per room, drawers as sections
|
||||
|
||||
Streams drawers in paginated batches so memory usage stays bounded
|
||||
regardless of palace size.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
from .palace import get_collection
|
||||
|
||||
|
||||
def _safe_path_component(name: str) -> str:
|
||||
"""Sanitize a string for use as a directory/file name component."""
|
||||
name = re.sub(r'[/\\:*?"<>|]', '_', name)
|
||||
name = name.strip('. ')
|
||||
return name or 'unknown'
|
||||
|
||||
|
||||
def export_palace(palace_path: str, output_dir: str, format: str = "markdown") -> dict:
|
||||
"""Export all palace drawers as markdown files organized by wing/room.
|
||||
|
||||
Streams drawers in batches of 1000 and writes each wing/room file
|
||||
incrementally, keeping memory usage proportional to batch size rather
|
||||
than total palace size.
|
||||
|
||||
Args:
|
||||
palace_path: Path to the ChromaDB palace directory.
|
||||
output_dir: Where to write the exported markdown tree.
|
||||
format: Output format (currently only "markdown").
|
||||
|
||||
Returns:
|
||||
Stats dict: {"wings": N, "rooms": N, "drawers": N}
|
||||
"""
|
||||
col = get_collection(palace_path)
|
||||
total = col.count()
|
||||
|
||||
if total == 0:
|
||||
print(" Palace is empty — nothing to export.")
|
||||
return {"wings": 0, "rooms": 0, "drawers": 0}
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Track which room files have been opened (so we can append vs overwrite)
|
||||
opened_rooms: set[tuple[str, str]] = set()
|
||||
# Track stats per wing: {wing: {room: count}}
|
||||
wing_stats: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
total_drawers = 0
|
||||
|
||||
print(f" Streaming {total} drawers...")
|
||||
offset = 0
|
||||
while offset < total:
|
||||
batch = col.get(limit=1000, offset=offset, include=["documents", "metadatas"])
|
||||
if not batch["ids"]:
|
||||
break
|
||||
|
||||
# Group this batch by wing/room so we do one file write per room per batch
|
||||
batch_grouped: dict[str, dict[str, list]] = defaultdict(lambda: defaultdict(list))
|
||||
for doc_id, doc, meta in zip(batch["ids"], batch["documents"], batch["metadatas"]):
|
||||
wing = meta.get("wing", "unknown")
|
||||
room = meta.get("room", "general")
|
||||
batch_grouped[wing][room].append({
|
||||
"id": doc_id,
|
||||
"content": doc,
|
||||
"source": meta.get("source_file", ""),
|
||||
"filed_at": meta.get("filed_at", ""),
|
||||
"added_by": meta.get("added_by", ""),
|
||||
})
|
||||
|
||||
# Write/append each room file
|
||||
for wing, rooms in batch_grouped.items():
|
||||
safe_wing = _safe_path_component(wing)
|
||||
wing_dir = os.path.join(output_dir, safe_wing)
|
||||
os.makedirs(wing_dir, exist_ok=True)
|
||||
|
||||
for room, drawers in rooms.items():
|
||||
safe_room = _safe_path_component(room)
|
||||
room_path = os.path.join(wing_dir, f"{safe_room}.md")
|
||||
key = (wing, room)
|
||||
is_new = key not in opened_rooms
|
||||
|
||||
with open(room_path, "a" if not is_new else "w", encoding="utf-8") as f:
|
||||
if is_new:
|
||||
f.write(f"# {wing} / {room}\n\n")
|
||||
opened_rooms.add(key)
|
||||
|
||||
for drawer in drawers:
|
||||
source = drawer["source"] or "unknown"
|
||||
filed = drawer["filed_at"] or "unknown"
|
||||
added_by = drawer["added_by"] or "unknown"
|
||||
|
||||
f.write(
|
||||
f"## {drawer['id']}\n"
|
||||
f"\n"
|
||||
f"> {_quote_content(drawer['content'])}\n"
|
||||
f"\n"
|
||||
f"| Field | Value |\n"
|
||||
f"|-------|-------|\n"
|
||||
f"| Source | {source} |\n"
|
||||
f"| Filed | {filed} |\n"
|
||||
f"| Added by | {added_by} |\n"
|
||||
f"\n"
|
||||
f"---\n\n"
|
||||
)
|
||||
|
||||
wing_stats[wing][room] += len(drawers)
|
||||
total_drawers += len(drawers)
|
||||
|
||||
offset += len(batch["ids"])
|
||||
|
||||
# Build and print stats
|
||||
index_rows = []
|
||||
for wing in sorted(wing_stats):
|
||||
rooms = wing_stats[wing]
|
||||
wing_drawer_count = sum(rooms.values())
|
||||
index_rows.append((wing, len(rooms), wing_drawer_count))
|
||||
print(f" {wing}: {len(rooms)} rooms, {wing_drawer_count} drawers")
|
||||
|
||||
# Write index.md
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
index_lines = [
|
||||
f"# Palace Export — {today}\n",
|
||||
"",
|
||||
"| Wing | Rooms | Drawers |",
|
||||
"|------|-------|---------|",
|
||||
]
|
||||
for wing, room_count, drawer_count in index_rows:
|
||||
index_lines.append(f"| [{wing}]({wing}/) | {room_count} | {drawer_count} |")
|
||||
index_lines.append("")
|
||||
|
||||
index_path = os.path.join(output_dir, "index.md")
|
||||
with open(index_path, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(index_lines))
|
||||
|
||||
stats = {"wings": len(wing_stats), "rooms": sum(r for _, r, _ in index_rows), "drawers": total_drawers}
|
||||
print(f"\n Exported {stats['drawers']} drawers across {stats['wings']} wings, {stats['rooms']} rooms")
|
||||
print(f" Output: {output_dir}")
|
||||
return stats
|
||||
|
||||
|
||||
def _quote_content(text: str) -> str:
|
||||
"""Format content for a markdown blockquote, handling multiline."""
|
||||
lines = text.rstrip("\n").split("\n")
|
||||
return "\n> ".join(lines)
|
||||
Reference in New Issue
Block a user