2026-04-04 18:16:04 -07:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
miner.py — Files everything into the palace.
|
|
|
|
|
|
|
|
|
|
Reads mempalace.yaml from the project directory to know the wing + rooms.
|
|
|
|
|
Routes each file to the right room based on content.
|
|
|
|
|
Stores verbatim chunks as drawers. No summaries. Ever.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import os
|
2026-04-11 11:05:27 -04:00
|
|
|
import sys
|
2026-04-04 18:16:04 -07:00
|
|
|
import hashlib
|
2026-04-07 22:26:06 +08:00
|
|
|
import fnmatch
|
2026-04-04 18:16:04 -07:00
|
|
|
from pathlib import Path
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
2026-04-13 01:33:48 -07:00
|
|
|
from .palace import (
|
2026-04-13 16:29:50 -03:00
|
|
|
NORMALIZE_VERSION,
|
|
|
|
|
SKIP_DIRS,
|
2026-04-13 17:00:55 -03:00
|
|
|
build_closet_lines,
|
2026-04-13 16:29:50 -03:00
|
|
|
file_already_mined,
|
2026-04-13 17:00:55 -03:00
|
|
|
get_closets_collection,
|
2026-04-13 16:29:50 -03:00
|
|
|
get_collection,
|
|
|
|
|
mine_lock,
|
2026-04-13 17:00:55 -03:00
|
|
|
purge_file_closets,
|
|
|
|
|
upsert_closet_lines,
|
2026-04-13 01:33:48 -07:00
|
|
|
)
|
2026-04-09 08:06:30 -07:00
|
|
|
|
2026-04-04 18:16:04 -07:00
|
|
|
READABLE_EXTENSIONS = {
|
|
|
|
|
".txt",
|
|
|
|
|
".md",
|
|
|
|
|
".py",
|
|
|
|
|
".js",
|
|
|
|
|
".ts",
|
|
|
|
|
".jsx",
|
|
|
|
|
".tsx",
|
|
|
|
|
".json",
|
|
|
|
|
".yaml",
|
|
|
|
|
".yml",
|
|
|
|
|
".html",
|
|
|
|
|
".css",
|
|
|
|
|
".java",
|
|
|
|
|
".go",
|
|
|
|
|
".rs",
|
|
|
|
|
".rb",
|
|
|
|
|
".sh",
|
|
|
|
|
".csv",
|
|
|
|
|
".sql",
|
|
|
|
|
".toml",
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
SKIP_FILENAMES = {
|
|
|
|
|
"mempalace.yaml",
|
|
|
|
|
"mempalace.yml",
|
|
|
|
|
"mempal.yaml",
|
|
|
|
|
"mempal.yml",
|
|
|
|
|
".gitignore",
|
|
|
|
|
"package-lock.json",
|
2026-04-04 18:16:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CHUNK_SIZE = 800 # chars per drawer
|
|
|
|
|
CHUNK_OVERLAP = 100 # overlap between chunks
|
|
|
|
|
MIN_CHUNK_SIZE = 50 # skip tiny chunks
|
2026-04-09 08:06:30 -07:00
|
|
|
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB — skip files larger than this
|
2026-04-04 18:16:04 -07:00
|
|
|
|
|
|
|
|
|
2026-04-07 22:26:06 +08:00
|
|
|
# =============================================================================
|
|
|
|
|
# IGNORE MATCHING
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GitignoreMatcher:
|
2026-04-08 00:02:21 +08:00
|
|
|
"""Lightweight matcher for one directory's .gitignore patterns."""
|
2026-04-07 22:26:06 +08:00
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
def __init__(self, base_dir: Path, rules: list):
|
|
|
|
|
self.base_dir = base_dir
|
2026-04-07 22:26:06 +08:00
|
|
|
self.rules = rules
|
|
|
|
|
|
|
|
|
|
@classmethod
|
2026-04-08 00:02:21 +08:00
|
|
|
def from_dir(cls, dir_path: Path):
|
|
|
|
|
gitignore_path = dir_path / ".gitignore"
|
|
|
|
|
if not gitignore_path.is_file():
|
|
|
|
|
return None
|
2026-04-07 22:26:06 +08:00
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
lines = gitignore_path.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
|
|
|
except Exception:
|
2026-04-08 00:02:21 +08:00
|
|
|
return None
|
2026-04-07 22:26:06 +08:00
|
|
|
|
|
|
|
|
rules = []
|
|
|
|
|
for raw_line in lines:
|
|
|
|
|
line = raw_line.strip()
|
2026-04-08 00:02:21 +08:00
|
|
|
if not line:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if line.startswith("\\#") or line.startswith("\\!"):
|
|
|
|
|
line = line[1:]
|
|
|
|
|
elif line.startswith("#"):
|
2026-04-07 22:26:06 +08:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
negated = line.startswith("!")
|
|
|
|
|
if negated:
|
|
|
|
|
line = line[1:]
|
|
|
|
|
|
|
|
|
|
anchored = line.startswith("/")
|
|
|
|
|
if anchored:
|
|
|
|
|
line = line.lstrip("/")
|
|
|
|
|
|
|
|
|
|
dir_only = line.endswith("/")
|
|
|
|
|
if dir_only:
|
|
|
|
|
line = line.rstrip("/")
|
|
|
|
|
|
|
|
|
|
if not line:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
rules.append(
|
|
|
|
|
{
|
|
|
|
|
"pattern": line,
|
|
|
|
|
"anchored": anchored,
|
|
|
|
|
"dir_only": dir_only,
|
|
|
|
|
"negated": negated,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
if not rules:
|
|
|
|
|
return None
|
2026-04-07 22:26:06 +08:00
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
return cls(dir_path, rules)
|
2026-04-07 22:26:06 +08:00
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
def matches(self, path: Path, is_dir: bool = None):
|
2026-04-07 22:26:06 +08:00
|
|
|
try:
|
2026-04-08 00:02:21 +08:00
|
|
|
relative = path.relative_to(self.base_dir).as_posix().strip("/")
|
2026-04-07 22:26:06 +08:00
|
|
|
except ValueError:
|
2026-04-08 00:02:21 +08:00
|
|
|
return None
|
2026-04-07 22:26:06 +08:00
|
|
|
|
|
|
|
|
if not relative:
|
2026-04-08 00:02:21 +08:00
|
|
|
return None
|
2026-04-07 22:26:06 +08:00
|
|
|
|
|
|
|
|
if is_dir is None:
|
|
|
|
|
is_dir = path.is_dir()
|
|
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
ignored = None
|
2026-04-07 22:26:06 +08:00
|
|
|
for rule in self.rules:
|
|
|
|
|
if self._rule_matches(rule, relative, is_dir):
|
|
|
|
|
ignored = not rule["negated"]
|
|
|
|
|
return ignored
|
|
|
|
|
|
|
|
|
|
def _rule_matches(self, rule: dict, relative: str, is_dir: bool) -> bool:
|
|
|
|
|
pattern = rule["pattern"]
|
|
|
|
|
parts = relative.split("/")
|
|
|
|
|
pattern_parts = pattern.split("/")
|
|
|
|
|
|
|
|
|
|
if rule["dir_only"]:
|
|
|
|
|
target_parts = parts if is_dir else parts[:-1]
|
|
|
|
|
if not target_parts:
|
|
|
|
|
return False
|
|
|
|
|
if rule["anchored"] or len(pattern_parts) > 1:
|
|
|
|
|
return self._match_from_root(target_parts, pattern_parts)
|
|
|
|
|
return any(fnmatch.fnmatch(part, pattern) for part in target_parts)
|
|
|
|
|
|
|
|
|
|
if rule["anchored"] or len(pattern_parts) > 1:
|
|
|
|
|
return self._match_from_root(parts, pattern_parts)
|
|
|
|
|
|
|
|
|
|
return any(fnmatch.fnmatch(part, pattern) for part in parts)
|
|
|
|
|
|
|
|
|
|
def _match_from_root(self, target_parts: list, pattern_parts: list) -> bool:
|
|
|
|
|
def matches(path_index: int, pattern_index: int) -> bool:
|
|
|
|
|
if pattern_index == len(pattern_parts):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
if path_index == len(target_parts):
|
|
|
|
|
return all(part == "**" for part in pattern_parts[pattern_index:])
|
|
|
|
|
|
|
|
|
|
pattern_part = pattern_parts[pattern_index]
|
|
|
|
|
if pattern_part == "**":
|
|
|
|
|
return matches(path_index, pattern_index + 1) or matches(
|
|
|
|
|
path_index + 1, pattern_index
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not fnmatch.fnmatch(target_parts[path_index], pattern_part):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
return matches(path_index + 1, pattern_index + 1)
|
|
|
|
|
|
|
|
|
|
return matches(0, 0)
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
def load_gitignore_matcher(dir_path: Path, cache: dict):
|
|
|
|
|
"""Load and cache one directory's .gitignore matcher."""
|
|
|
|
|
if dir_path not in cache:
|
|
|
|
|
cache[dir_path] = GitignoreMatcher.from_dir(dir_path)
|
|
|
|
|
return cache[dir_path]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_gitignored(path: Path, matchers: list, is_dir: bool = False) -> bool:
|
|
|
|
|
"""Apply active .gitignore matchers in ancestor order; last match wins."""
|
|
|
|
|
ignored = False
|
|
|
|
|
for matcher in matchers:
|
|
|
|
|
decision = matcher.matches(path, is_dir=is_dir)
|
|
|
|
|
if decision is not None:
|
|
|
|
|
ignored = decision
|
|
|
|
|
return ignored
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def should_skip_dir(dirname: str) -> bool:
|
|
|
|
|
"""Skip known generated/cache directories before gitignore matching."""
|
|
|
|
|
return dirname in SKIP_DIRS or dirname.endswith(".egg-info")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_include_paths(include_ignored: list) -> set:
|
|
|
|
|
"""Normalize comma-parsed include paths into project-relative POSIX strings."""
|
|
|
|
|
normalized = set()
|
|
|
|
|
for raw_path in include_ignored or []:
|
|
|
|
|
candidate = str(raw_path).strip().strip("/")
|
|
|
|
|
if candidate:
|
|
|
|
|
normalized.add(Path(candidate).as_posix())
|
|
|
|
|
return normalized
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_exact_force_include(path: Path, project_path: Path, include_paths: set) -> bool:
|
|
|
|
|
"""Return True when a path exactly matches an explicit include override."""
|
|
|
|
|
if not include_paths:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
relative = path.relative_to(project_path).as_posix().strip("/")
|
|
|
|
|
except ValueError:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
return relative in include_paths
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_force_included(path: Path, project_path: Path, include_paths: set) -> bool:
|
|
|
|
|
"""Return True when a path or one of its ancestors/descendants was explicitly included."""
|
|
|
|
|
if not include_paths:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
relative = path.relative_to(project_path).as_posix().strip("/")
|
|
|
|
|
except ValueError:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
if not relative:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
for include_path in include_paths:
|
|
|
|
|
if relative == include_path:
|
|
|
|
|
return True
|
|
|
|
|
if relative.startswith(f"{include_path}/"):
|
|
|
|
|
return True
|
|
|
|
|
if include_path.startswith(f"{relative}/"):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2026-04-04 18:16:04 -07:00
|
|
|
# =============================================================================
|
|
|
|
|
# CONFIG
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_config(project_dir: str) -> dict:
|
|
|
|
|
"""Load mempalace.yaml from project directory (falls back to mempal.yaml)."""
|
|
|
|
|
import yaml
|
|
|
|
|
|
2026-04-07 16:36:27 -07:00
|
|
|
resolved_project_dir = Path(project_dir).expanduser().resolve()
|
|
|
|
|
config_path = resolved_project_dir / "mempalace.yaml"
|
2026-04-04 18:16:04 -07:00
|
|
|
if not config_path.exists():
|
|
|
|
|
# Fallback to legacy name
|
2026-04-07 16:36:27 -07:00
|
|
|
legacy_path = resolved_project_dir / "mempal.yaml"
|
2026-04-04 18:16:04 -07:00
|
|
|
if legacy_path.exists():
|
|
|
|
|
config_path = legacy_path
|
|
|
|
|
else:
|
2026-04-11 11:05:27 -04:00
|
|
|
wing_name = resolved_project_dir.name
|
2026-04-07 16:36:27 -07:00
|
|
|
print(
|
|
|
|
|
f" No mempalace.yaml found in {resolved_project_dir} "
|
2026-04-11 11:05:27 -04:00
|
|
|
f"— using auto-detected defaults (wing='{wing_name}'). "
|
|
|
|
|
"Directories with the same basename will share a wing; "
|
|
|
|
|
"add mempalace.yaml to disambiguate.",
|
|
|
|
|
file=sys.stderr,
|
2026-04-07 16:36:27 -07:00
|
|
|
)
|
|
|
|
|
return {
|
2026-04-11 11:05:27 -04:00
|
|
|
"wing": wing_name,
|
2026-04-07 16:36:27 -07:00
|
|
|
"rooms": [
|
|
|
|
|
{
|
|
|
|
|
"name": "general",
|
|
|
|
|
"description": "All project files",
|
|
|
|
|
"keywords": ["general"],
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
}
|
2026-04-04 18:16:04 -07:00
|
|
|
with open(config_path) as f:
|
|
|
|
|
return yaml.safe_load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# FILE ROUTING — which room does this file belong to?
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_room(filepath: Path, content: str, rooms: list, project_path: Path) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Route a file to the right room.
|
|
|
|
|
Priority:
|
|
|
|
|
1. Folder path matches a room name
|
|
|
|
|
2. Filename matches a room name or keyword
|
|
|
|
|
3. Content keyword scoring
|
|
|
|
|
4. Fallback: "general"
|
|
|
|
|
"""
|
|
|
|
|
relative = str(filepath.relative_to(project_path)).lower()
|
|
|
|
|
filename = filepath.stem.lower()
|
|
|
|
|
content_lower = content[:2000].lower()
|
|
|
|
|
|
2026-04-07 14:06:56 -07:00
|
|
|
# Priority 1: folder path matches room name or keywords
|
2026-04-04 18:16:04 -07:00
|
|
|
path_parts = relative.replace("\\", "/").split("/")
|
|
|
|
|
for part in path_parts[:-1]: # skip filename itself
|
|
|
|
|
for room in rooms:
|
2026-04-07 14:06:56 -07:00
|
|
|
candidates = [room["name"].lower()] + [k.lower() for k in room.get("keywords", [])]
|
|
|
|
|
if any(part == c or c in part or part in c for c in candidates):
|
2026-04-04 18:16:04 -07:00
|
|
|
return room["name"]
|
|
|
|
|
|
|
|
|
|
# Priority 2: filename matches room name
|
|
|
|
|
for room in rooms:
|
|
|
|
|
if room["name"].lower() in filename or filename in room["name"].lower():
|
|
|
|
|
return room["name"]
|
|
|
|
|
|
|
|
|
|
# Priority 3: keyword scoring from room keywords + name
|
|
|
|
|
scores = defaultdict(int)
|
|
|
|
|
for room in rooms:
|
|
|
|
|
keywords = room.get("keywords", []) + [room["name"]]
|
|
|
|
|
for kw in keywords:
|
|
|
|
|
count = content_lower.count(kw.lower())
|
|
|
|
|
scores[room["name"]] += count
|
|
|
|
|
|
|
|
|
|
if scores:
|
|
|
|
|
best = max(scores, key=scores.get)
|
|
|
|
|
if scores[best] > 0:
|
|
|
|
|
return best
|
|
|
|
|
|
|
|
|
|
return "general"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# CHUNKING
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chunk_text(content: str, source_file: str) -> list:
|
|
|
|
|
"""
|
|
|
|
|
Split content into drawer-sized chunks.
|
|
|
|
|
Tries to split on paragraph/line boundaries.
|
|
|
|
|
Returns list of {"content": str, "chunk_index": int}
|
|
|
|
|
"""
|
|
|
|
|
# Clean up
|
|
|
|
|
content = content.strip()
|
|
|
|
|
if not content:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
chunks = []
|
|
|
|
|
start = 0
|
|
|
|
|
chunk_index = 0
|
|
|
|
|
|
|
|
|
|
while start < len(content):
|
|
|
|
|
end = min(start + CHUNK_SIZE, len(content))
|
|
|
|
|
|
|
|
|
|
# Try to break at paragraph boundary
|
|
|
|
|
if end < len(content):
|
|
|
|
|
newline_pos = content.rfind("\n\n", start, end)
|
|
|
|
|
if newline_pos > start + CHUNK_SIZE // 2:
|
|
|
|
|
end = newline_pos
|
|
|
|
|
else:
|
|
|
|
|
newline_pos = content.rfind("\n", start, end)
|
|
|
|
|
if newline_pos > start + CHUNK_SIZE // 2:
|
|
|
|
|
end = newline_pos
|
|
|
|
|
|
|
|
|
|
chunk = content[start:end].strip()
|
|
|
|
|
if len(chunk) >= MIN_CHUNK_SIZE:
|
|
|
|
|
chunks.append(
|
|
|
|
|
{
|
|
|
|
|
"content": chunk,
|
|
|
|
|
"chunk_index": chunk_index,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
chunk_index += 1
|
|
|
|
|
|
|
|
|
|
start = end - CHUNK_OVERLAP if end < len(content) else end
|
|
|
|
|
|
|
|
|
|
return chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# PALACE — ChromaDB operations
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
2026-04-13 17:37:45 -03:00
|
|
|
_ENTITY_REGISTRY_PATH = os.path.join(os.path.expanduser("~"), ".mempalace", "known_entities.json")
|
2026-04-13 18:20:11 -03:00
|
|
|
_ENTITY_REGISTRY_CACHE: dict = {"mtime": None, "names": frozenset(), "raw": {}}
|
2026-04-13 17:37:45 -03:00
|
|
|
_ENTITY_EXTRACT_WINDOW = 5000 # chars of content scanned for capitalized words
|
|
|
|
|
_ENTITY_METADATA_LIMIT = 25 # max entities packed into the metadata field
|
|
|
|
|
|
|
|
|
|
|
2026-04-13 18:20:11 -03:00
|
|
|
def _refresh_known_entities_cache() -> None:
|
|
|
|
|
"""Reload ``~/.mempalace/known_entities.json`` into the module cache if
|
|
|
|
|
its mtime changed since the last read. Shared by ``_load_known_entities``
|
|
|
|
|
(flat set) and ``_load_known_entities_raw`` (category dict), so callers
|
|
|
|
|
can pick whichever shape they need without duplicating the mtime-gated
|
|
|
|
|
disk read.
|
2026-04-13 17:37:45 -03:00
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
mtime = os.path.getmtime(_ENTITY_REGISTRY_PATH)
|
|
|
|
|
except OSError:
|
|
|
|
|
if _ENTITY_REGISTRY_CACHE["mtime"] is not None:
|
|
|
|
|
_ENTITY_REGISTRY_CACHE["mtime"] = None
|
|
|
|
|
_ENTITY_REGISTRY_CACHE["names"] = frozenset()
|
2026-04-13 18:20:11 -03:00
|
|
|
_ENTITY_REGISTRY_CACHE["raw"] = {}
|
|
|
|
|
return
|
2026-04-13 17:37:45 -03:00
|
|
|
|
|
|
|
|
if _ENTITY_REGISTRY_CACHE["mtime"] == mtime:
|
2026-04-13 18:20:11 -03:00
|
|
|
return
|
2026-04-13 17:37:45 -03:00
|
|
|
|
|
|
|
|
names: set = set()
|
2026-04-13 18:20:11 -03:00
|
|
|
raw: dict = {}
|
2026-04-13 17:37:45 -03:00
|
|
|
try:
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
with open(_ENTITY_REGISTRY_PATH, "r", encoding="utf-8") as f:
|
|
|
|
|
data = json.load(f)
|
2026-04-13 18:20:11 -03:00
|
|
|
if isinstance(data, dict):
|
|
|
|
|
raw = data
|
|
|
|
|
for cat in data.values():
|
|
|
|
|
if isinstance(cat, list):
|
|
|
|
|
names.update(str(n) for n in cat if n)
|
|
|
|
|
elif isinstance(cat, dict):
|
|
|
|
|
names.update(str(k) for k in cat.keys() if k)
|
2026-04-13 17:37:45 -03:00
|
|
|
except Exception:
|
|
|
|
|
names = set()
|
2026-04-13 18:20:11 -03:00
|
|
|
raw = {}
|
2026-04-13 17:37:45 -03:00
|
|
|
|
|
|
|
|
_ENTITY_REGISTRY_CACHE["mtime"] = mtime
|
|
|
|
|
_ENTITY_REGISTRY_CACHE["names"] = frozenset(names)
|
2026-04-13 18:20:11 -03:00
|
|
|
_ENTITY_REGISTRY_CACHE["raw"] = raw
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_known_entities() -> frozenset:
|
|
|
|
|
"""Flat set of every known entity name (across all categories).
|
|
|
|
|
|
|
|
|
|
Cached by mtime; invalidated when the registry file changes.
|
|
|
|
|
"""
|
|
|
|
|
_refresh_known_entities_cache()
|
2026-04-13 17:37:45 -03:00
|
|
|
return _ENTITY_REGISTRY_CACHE["names"]
|
|
|
|
|
|
|
|
|
|
|
2026-04-13 18:20:11 -03:00
|
|
|
def _load_known_entities_raw() -> dict:
|
|
|
|
|
"""Full category-dict view of the registry, shape
|
|
|
|
|
``{"category": ["Name1", ...], ...}``. Cached by mtime.
|
|
|
|
|
|
|
|
|
|
Consumed by modules (e.g., fact_checker) that need to reason about
|
|
|
|
|
categories rather than a flat name set. Never returns a mutable
|
|
|
|
|
reference to the cache — callers get a shallow copy.
|
|
|
|
|
"""
|
|
|
|
|
_refresh_known_entities_cache()
|
|
|
|
|
return dict(_ENTITY_REGISTRY_CACHE["raw"])
|
|
|
|
|
|
|
|
|
|
|
2026-04-13 17:11:11 -07:00
|
|
|
_HALL_KEYWORDS_CACHE = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_hall(content: str) -> str:
|
|
|
|
|
"""Route content to a hall based on keyword scoring.
|
|
|
|
|
|
|
|
|
|
Halls connect rooms within a wing — they categorize the TYPE of content
|
|
|
|
|
(emotional, technical, family, etc.) while rooms categorize the TOPIC.
|
|
|
|
|
"""
|
|
|
|
|
global _HALL_KEYWORDS_CACHE
|
|
|
|
|
if _HALL_KEYWORDS_CACHE is None:
|
|
|
|
|
from .config import MempalaceConfig
|
|
|
|
|
|
|
|
|
|
_HALL_KEYWORDS_CACHE = MempalaceConfig().hall_keywords
|
|
|
|
|
content_lower = content[:3000].lower()
|
|
|
|
|
|
|
|
|
|
scores = {}
|
|
|
|
|
for hall, keywords in _HALL_KEYWORDS_CACHE.items():
|
|
|
|
|
score = sum(1 for kw in keywords if kw in content_lower)
|
|
|
|
|
if score > 0:
|
|
|
|
|
scores[hall] = score
|
|
|
|
|
|
|
|
|
|
if scores:
|
|
|
|
|
return max(scores, key=scores.get)
|
|
|
|
|
return "general"
|
|
|
|
|
|
|
|
|
|
|
2026-04-13 01:47:19 -07:00
|
|
|
def _extract_entities_for_metadata(content: str) -> str:
|
|
|
|
|
"""Extract entity names from content for metadata tagging.
|
|
|
|
|
|
2026-04-13 17:37:45 -03:00
|
|
|
Combines the user's known-entity registry (cached across calls) with
|
|
|
|
|
capitalized words appearing ≥2 times in the first ``_ENTITY_EXTRACT_WINDOW``
|
|
|
|
|
chars. Filters out the closet stoplist (``When``, ``After``, ``The``, …)
|
|
|
|
|
so sentence-starters don't masquerade as proper nouns.
|
|
|
|
|
|
|
|
|
|
Returns semicolon-separated string suitable for ChromaDB metadata
|
|
|
|
|
filtering. The list is truncated to ``_ENTITY_METADATA_LIMIT`` entries
|
|
|
|
|
*before* joining so a name is never cut in half.
|
2026-04-13 01:47:19 -07:00
|
|
|
"""
|
|
|
|
|
import re
|
|
|
|
|
|
2026-04-13 17:37:45 -03:00
|
|
|
from .palace import _ENTITY_STOPLIST
|
|
|
|
|
|
|
|
|
|
matched: set = set()
|
|
|
|
|
|
|
|
|
|
known = _load_known_entities()
|
|
|
|
|
for name in known:
|
|
|
|
|
if re.search(r"(?<!\w)" + re.escape(name) + r"(?!\w)", content):
|
2026-04-13 01:47:19 -07:00
|
|
|
matched.add(name)
|
2026-04-13 17:37:45 -03:00
|
|
|
|
|
|
|
|
window = content[:_ENTITY_EXTRACT_WINDOW]
|
|
|
|
|
words = re.findall(r"\b[A-Z][a-z]{2,}\b", window)
|
|
|
|
|
freq: dict = {}
|
2026-04-13 01:47:19 -07:00
|
|
|
for w in words:
|
2026-04-13 17:37:45 -03:00
|
|
|
if w in _ENTITY_STOPLIST:
|
|
|
|
|
continue
|
2026-04-13 01:47:19 -07:00
|
|
|
freq[w] = freq.get(w, 0) + 1
|
|
|
|
|
for w, c in freq.items():
|
|
|
|
|
if c >= 2 and len(w) > 2:
|
|
|
|
|
matched.add(w)
|
|
|
|
|
|
2026-04-13 17:37:45 -03:00
|
|
|
if not matched:
|
|
|
|
|
return ""
|
|
|
|
|
# Truncate the *list*, not the joined string — never split a name.
|
|
|
|
|
capped = sorted(matched)[:_ENTITY_METADATA_LIMIT]
|
|
|
|
|
return ";".join(capped)
|
2026-04-13 01:47:19 -07:00
|
|
|
|
|
|
|
|
|
2026-04-04 18:16:04 -07:00
|
|
|
def add_drawer(
|
|
|
|
|
collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
|
|
|
|
|
):
|
|
|
|
|
"""Add one drawer to the palace."""
|
2026-04-09 08:06:30 -07:00
|
|
|
drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk_index)).encode()).hexdigest()[:24]}"
|
2026-04-04 18:16:04 -07:00
|
|
|
try:
|
2026-04-07 17:44:19 -03:00
|
|
|
metadata = {
|
|
|
|
|
"wing": wing,
|
|
|
|
|
"room": room,
|
|
|
|
|
"source_file": source_file,
|
|
|
|
|
"chunk_index": chunk_index,
|
|
|
|
|
"added_by": agent,
|
|
|
|
|
"filed_at": datetime.now().isoformat(),
|
2026-04-13 16:20:55 -03:00
|
|
|
"normalize_version": NORMALIZE_VERSION,
|
2026-04-07 17:44:19 -03:00
|
|
|
}
|
|
|
|
|
# Store file mtime so we can detect modifications later.
|
|
|
|
|
try:
|
|
|
|
|
metadata["source_mtime"] = os.path.getmtime(source_file)
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
2026-04-13 17:11:11 -07:00
|
|
|
# Tag with hall for graph connectivity within wings
|
|
|
|
|
metadata["hall"] = detect_hall(content)
|
2026-04-13 01:47:19 -07:00
|
|
|
# Tag with entity names for filterable search
|
|
|
|
|
entities = _extract_entities_for_metadata(content)
|
|
|
|
|
if entities:
|
|
|
|
|
metadata["entities"] = entities
|
2026-04-07 17:27:41 -03:00
|
|
|
collection.upsert(
|
2026-04-04 18:16:04 -07:00
|
|
|
documents=[content],
|
|
|
|
|
ids=[drawer_id],
|
2026-04-07 17:44:19 -03:00
|
|
|
metadatas=[metadata],
|
2026-04-04 18:16:04 -07:00
|
|
|
)
|
|
|
|
|
return True
|
2026-04-07 17:27:41 -03:00
|
|
|
except Exception:
|
2026-04-04 18:16:04 -07:00
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# PROCESS ONE FILE
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_file(
|
|
|
|
|
filepath: Path,
|
|
|
|
|
project_path: Path,
|
|
|
|
|
collection,
|
|
|
|
|
wing: str,
|
|
|
|
|
rooms: list,
|
|
|
|
|
agent: str,
|
|
|
|
|
dry_run: bool,
|
2026-04-13 01:33:48 -07:00
|
|
|
closets_col=None,
|
2026-04-08 00:57:58 +02:00
|
|
|
) -> tuple:
|
|
|
|
|
"""Read, chunk, route, and file one file. Returns (drawer_count, room_name)."""
|
2026-04-04 18:16:04 -07:00
|
|
|
|
|
|
|
|
# Skip if already filed
|
|
|
|
|
source_file = str(filepath)
|
2026-04-09 08:52:24 -07:00
|
|
|
if not dry_run and file_already_mined(collection, source_file, check_mtime=True):
|
2026-04-13 02:23:44 +05:00
|
|
|
return 0, "general"
|
2026-04-04 18:16:04 -07:00
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
content = filepath.read_text(encoding="utf-8", errors="replace")
|
2026-04-07 13:51:27 +02:00
|
|
|
except OSError:
|
2026-04-13 02:23:44 +05:00
|
|
|
return 0, "general"
|
2026-04-04 18:16:04 -07:00
|
|
|
|
|
|
|
|
content = content.strip()
|
|
|
|
|
if len(content) < MIN_CHUNK_SIZE:
|
2026-04-13 02:23:44 +05:00
|
|
|
return 0, "general"
|
2026-04-04 18:16:04 -07:00
|
|
|
|
|
|
|
|
room = detect_room(filepath, content, rooms, project_path)
|
|
|
|
|
chunks = chunk_text(content, source_file)
|
|
|
|
|
|
|
|
|
|
if dry_run:
|
|
|
|
|
print(f" [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)")
|
2026-04-08 00:57:58 +02:00
|
|
|
return len(chunks), room
|
2026-04-04 18:16:04 -07:00
|
|
|
|
2026-04-13 01:16:51 -07:00
|
|
|
# Lock this file so concurrent agents don't interleave delete+insert.
|
|
|
|
|
# Without the lock, two agents can both pass file_already_mined(),
|
|
|
|
|
# both delete, and both insert — creating duplicates or losing data.
|
|
|
|
|
with mine_lock(source_file):
|
|
|
|
|
# Re-check after acquiring lock — another agent may have just finished
|
|
|
|
|
if file_already_mined(collection, source_file, check_mtime=True):
|
|
|
|
|
return 0, room
|
|
|
|
|
|
|
|
|
|
# Purge stale drawers for this file before re-inserting the fresh chunks.
|
|
|
|
|
# Converts modified-file re-mines from upsert-over-existing-IDs (which hits
|
|
|
|
|
# hnswlib's thread-unsafe updatePoint path and can segfault on macOS ARM
|
|
|
|
|
# with chromadb 0.6.3) into a clean delete+insert, bypassing the update
|
|
|
|
|
# path entirely.
|
|
|
|
|
try:
|
|
|
|
|
collection.delete(where={"source_file": source_file})
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
drawers_added = 0
|
|
|
|
|
for chunk in chunks:
|
|
|
|
|
added = add_drawer(
|
|
|
|
|
collection=collection,
|
|
|
|
|
wing=wing,
|
|
|
|
|
room=room,
|
|
|
|
|
content=chunk["content"],
|
|
|
|
|
source_file=source_file,
|
|
|
|
|
chunk_index=chunk["chunk_index"],
|
|
|
|
|
agent=agent,
|
|
|
|
|
)
|
|
|
|
|
if added:
|
|
|
|
|
drawers_added += 1
|
2026-04-04 18:16:04 -07:00
|
|
|
|
2026-04-13 17:00:55 -03:00
|
|
|
# Build closet — the searchable index pointing to these drawers.
|
|
|
|
|
# Purge first: a re-mine (mtime change or normalize_version bump) must
|
|
|
|
|
# fully replace the prior closets, not append to them.
|
2026-04-13 01:33:48 -07:00
|
|
|
if closets_col and drawers_added > 0:
|
|
|
|
|
drawer_ids = [
|
|
|
|
|
f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
|
|
|
|
|
for c in chunks
|
|
|
|
|
]
|
2026-04-13 01:40:58 -07:00
|
|
|
closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room)
|
2026-04-13 17:00:55 -03:00
|
|
|
closet_id_base = (
|
|
|
|
|
f"closet_{wing}_{room}_{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
|
|
|
|
|
)
|
2026-04-13 01:47:19 -07:00
|
|
|
entities = _extract_entities_for_metadata(content)
|
|
|
|
|
closet_meta = {
|
2026-04-13 01:33:48 -07:00
|
|
|
"wing": wing,
|
|
|
|
|
"room": room,
|
|
|
|
|
"source_file": source_file,
|
|
|
|
|
"drawer_count": drawers_added,
|
|
|
|
|
"filed_at": datetime.now().isoformat(),
|
2026-04-13 17:37:45 -03:00
|
|
|
"normalize_version": NORMALIZE_VERSION,
|
2026-04-13 01:47:19 -07:00
|
|
|
}
|
|
|
|
|
if entities:
|
|
|
|
|
closet_meta["entities"] = entities
|
2026-04-13 17:00:55 -03:00
|
|
|
purge_file_closets(closets_col, source_file)
|
2026-04-13 01:47:19 -07:00
|
|
|
upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta)
|
2026-04-13 01:33:48 -07:00
|
|
|
|
2026-04-08 00:57:58 +02:00
|
|
|
return drawers_added, room
|
2026-04-04 18:16:04 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# SCAN PROJECT
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
def scan_project(
|
|
|
|
|
project_dir: str,
|
|
|
|
|
respect_gitignore: bool = True,
|
|
|
|
|
include_ignored: list = None,
|
|
|
|
|
) -> list:
|
2026-04-04 18:16:04 -07:00
|
|
|
"""Return list of all readable file paths."""
|
|
|
|
|
project_path = Path(project_dir).expanduser().resolve()
|
|
|
|
|
files = []
|
2026-04-08 00:02:21 +08:00
|
|
|
active_matchers = []
|
|
|
|
|
matcher_cache = {}
|
|
|
|
|
include_paths = normalize_include_paths(include_ignored)
|
|
|
|
|
|
2026-04-04 18:16:04 -07:00
|
|
|
for root, dirs, filenames in os.walk(project_path):
|
2026-04-07 22:26:06 +08:00
|
|
|
root_path = Path(root)
|
2026-04-08 00:02:21 +08:00
|
|
|
|
|
|
|
|
if respect_gitignore:
|
|
|
|
|
active_matchers = [
|
|
|
|
|
matcher
|
|
|
|
|
for matcher in active_matchers
|
|
|
|
|
if root_path == matcher.base_dir or matcher.base_dir in root_path.parents
|
|
|
|
|
]
|
|
|
|
|
current_matcher = load_gitignore_matcher(root_path, matcher_cache)
|
|
|
|
|
if current_matcher is not None:
|
|
|
|
|
active_matchers.append(current_matcher)
|
|
|
|
|
|
|
|
|
|
dirs[:] = [
|
|
|
|
|
d
|
|
|
|
|
for d in dirs
|
|
|
|
|
if is_force_included(root_path / d, project_path, include_paths)
|
|
|
|
|
or not should_skip_dir(d)
|
|
|
|
|
]
|
|
|
|
|
if respect_gitignore and active_matchers:
|
2026-04-07 22:26:06 +08:00
|
|
|
dirs[:] = [
|
|
|
|
|
d
|
|
|
|
|
for d in dirs
|
2026-04-08 00:02:21 +08:00
|
|
|
if is_force_included(root_path / d, project_path, include_paths)
|
|
|
|
|
or not is_gitignored(root_path / d, active_matchers, is_dir=True)
|
2026-04-07 22:26:06 +08:00
|
|
|
]
|
2026-04-08 00:02:21 +08:00
|
|
|
|
2026-04-04 18:16:04 -07:00
|
|
|
for filename in filenames:
|
2026-04-07 22:26:06 +08:00
|
|
|
filepath = root_path / filename
|
2026-04-08 00:02:21 +08:00
|
|
|
force_include = is_force_included(filepath, project_path, include_paths)
|
|
|
|
|
exact_force_include = is_exact_force_include(filepath, project_path, include_paths)
|
|
|
|
|
|
|
|
|
|
if not force_include and filename in SKIP_FILENAMES:
|
|
|
|
|
continue
|
|
|
|
|
if filepath.suffix.lower() not in READABLE_EXTENSIONS and not exact_force_include:
|
2026-04-07 22:26:06 +08:00
|
|
|
continue
|
2026-04-08 00:02:21 +08:00
|
|
|
if respect_gitignore and active_matchers and not force_include:
|
|
|
|
|
if is_gitignored(filepath, active_matchers, is_dir=False):
|
2026-04-04 18:16:04 -07:00
|
|
|
continue
|
2026-04-09 08:06:30 -07:00
|
|
|
# Skip symlinks — prevents following links to /dev/urandom, etc.
|
|
|
|
|
if filepath.is_symlink():
|
|
|
|
|
continue
|
|
|
|
|
# Skip files exceeding size limit
|
|
|
|
|
try:
|
|
|
|
|
if filepath.stat().st_size > MAX_FILE_SIZE:
|
|
|
|
|
continue
|
|
|
|
|
except OSError:
|
|
|
|
|
continue
|
2026-04-08 00:02:21 +08:00
|
|
|
files.append(filepath)
|
2026-04-04 18:16:04 -07:00
|
|
|
return files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# MAIN: MINE
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def mine(
|
|
|
|
|
project_dir: str,
|
|
|
|
|
palace_path: str,
|
|
|
|
|
wing_override: str = None,
|
|
|
|
|
agent: str = "mempalace",
|
|
|
|
|
limit: int = 0,
|
|
|
|
|
dry_run: bool = False,
|
2026-04-08 00:02:21 +08:00
|
|
|
respect_gitignore: bool = True,
|
|
|
|
|
include_ignored: list = None,
|
2026-04-04 18:16:04 -07:00
|
|
|
):
|
|
|
|
|
"""Mine a project directory into the palace."""
|
|
|
|
|
|
|
|
|
|
project_path = Path(project_dir).expanduser().resolve()
|
|
|
|
|
config = load_config(project_dir)
|
|
|
|
|
|
|
|
|
|
wing = wing_override or config["wing"]
|
|
|
|
|
rooms = config.get("rooms", [{"name": "general", "description": "All project files"}])
|
|
|
|
|
|
2026-04-08 00:02:21 +08:00
|
|
|
files = scan_project(
|
|
|
|
|
project_dir,
|
|
|
|
|
respect_gitignore=respect_gitignore,
|
|
|
|
|
include_ignored=include_ignored,
|
|
|
|
|
)
|
2026-04-04 18:16:04 -07:00
|
|
|
if limit > 0:
|
|
|
|
|
files = files[:limit]
|
|
|
|
|
|
|
|
|
|
print(f"\n{'=' * 55}")
|
|
|
|
|
print(" MemPalace Mine")
|
|
|
|
|
print(f"{'=' * 55}")
|
|
|
|
|
print(f" Wing: {wing}")
|
|
|
|
|
print(f" Rooms: {', '.join(r['name'] for r in rooms)}")
|
|
|
|
|
print(f" Files: {len(files)}")
|
|
|
|
|
print(f" Palace: {palace_path}")
|
|
|
|
|
if dry_run:
|
|
|
|
|
print(" DRY RUN — nothing will be filed")
|
2026-04-08 00:02:21 +08:00
|
|
|
if not respect_gitignore:
|
|
|
|
|
print(" .gitignore: DISABLED")
|
|
|
|
|
if include_ignored:
|
|
|
|
|
print(f" Include: {', '.join(sorted(normalize_include_paths(include_ignored)))}")
|
2026-04-04 18:16:04 -07:00
|
|
|
print(f"{'─' * 55}\n")
|
|
|
|
|
|
|
|
|
|
if not dry_run:
|
|
|
|
|
collection = get_collection(palace_path)
|
2026-04-13 01:33:48 -07:00
|
|
|
closets_col = get_closets_collection(palace_path)
|
2026-04-04 18:16:04 -07:00
|
|
|
else:
|
|
|
|
|
collection = None
|
2026-04-13 01:33:48 -07:00
|
|
|
closets_col = None
|
2026-04-04 18:16:04 -07:00
|
|
|
|
|
|
|
|
total_drawers = 0
|
|
|
|
|
files_skipped = 0
|
|
|
|
|
room_counts = defaultdict(int)
|
|
|
|
|
|
|
|
|
|
for i, filepath in enumerate(files, 1):
|
2026-04-08 00:57:58 +02:00
|
|
|
drawers, room = process_file(
|
2026-04-04 18:16:04 -07:00
|
|
|
filepath=filepath,
|
|
|
|
|
project_path=project_path,
|
|
|
|
|
collection=collection,
|
|
|
|
|
wing=wing,
|
|
|
|
|
rooms=rooms,
|
|
|
|
|
agent=agent,
|
|
|
|
|
dry_run=dry_run,
|
2026-04-13 01:33:48 -07:00
|
|
|
closets_col=closets_col,
|
2026-04-04 18:16:04 -07:00
|
|
|
)
|
|
|
|
|
if drawers == 0 and not dry_run:
|
|
|
|
|
files_skipped += 1
|
|
|
|
|
else:
|
|
|
|
|
total_drawers += drawers
|
|
|
|
|
room_counts[room] += 1
|
|
|
|
|
if not dry_run:
|
|
|
|
|
print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}")
|
|
|
|
|
|
|
|
|
|
print(f"\n{'=' * 55}")
|
|
|
|
|
print(" Done.")
|
|
|
|
|
print(f" Files processed: {len(files) - files_skipped}")
|
|
|
|
|
print(f" Files skipped (already filed): {files_skipped}")
|
|
|
|
|
print(f" Drawers filed: {total_drawers}")
|
|
|
|
|
print("\n By room:")
|
|
|
|
|
for room, count in sorted(room_counts.items(), key=lambda x: x[1], reverse=True):
|
|
|
|
|
print(f" {room:20} {count} files")
|
|
|
|
|
print('\n Next: mempalace search "what you\'re looking for"')
|
|
|
|
|
print(f"{'=' * 55}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# STATUS
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def status(palace_path: str):
|
|
|
|
|
"""Show what's been filed in the palace."""
|
|
|
|
|
try:
|
2026-04-11 19:16:49 -04:00
|
|
|
col = get_collection(palace_path, create=False)
|
2026-04-04 18:16:04 -07:00
|
|
|
except Exception:
|
|
|
|
|
print(f"\n No palace found at {palace_path}")
|
|
|
|
|
print(" Run: mempalace init <dir> then mempalace mine <dir>")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Count by wing and room
|
2026-04-13 00:22:39 -04:00
|
|
|
total = col.count()
|
|
|
|
|
r = col.get(limit=total, include=["metadatas"]) if total else {"metadatas": []}
|
2026-04-04 18:16:04 -07:00
|
|
|
metas = r["metadatas"]
|
|
|
|
|
|
|
|
|
|
wing_rooms = defaultdict(lambda: defaultdict(int))
|
|
|
|
|
for m in metas:
|
|
|
|
|
wing_rooms[m.get("wing", "?")][m.get("room", "?")] += 1
|
|
|
|
|
|
|
|
|
|
print(f"\n{'=' * 55}")
|
|
|
|
|
print(f" MemPalace Status — {len(metas)} drawers")
|
|
|
|
|
print(f"{'=' * 55}\n")
|
|
|
|
|
for wing, rooms in sorted(wing_rooms.items()):
|
|
|
|
|
print(f" WING: {wing}")
|
|
|
|
|
for room, count in sorted(rooms.items(), key=lambda x: x[1], reverse=True):
|
|
|
|
|
print(f" ROOM: {room:20} {count:5} drawers")
|
|
|
|
|
print()
|
|
|
|
|
print(f"{'=' * 55}\n")
|