Files
mempalace/mempalace/palace.py
T
bensig c2308a1e36 fix: address code review — restore mtime check, bound metadata reads, harden security
Review fixes (from Sage's review):
- Restore mtime check in file_already_mined (check_mtime=True for miner)
- Restore limit=10000 on MCP metadata fetches to prevent OOM on large palaces
- Apply _SAFE_NAME_RE regex in sanitize_name (was dead code)
- Drop raw_aaak metadata duplication in diary_write
- chmod 0o700 on WAL dir, 0o600 on WAL file
- Add check_same_thread=False on KnowledgeGraph SQLite connection
- Remove __del__ (unreliable) and dead PRAGMA foreign_keys=ON
2026-04-09 08:52:24 -07:00

72 lines
1.9 KiB
Python

"""
palace.py — Shared palace operations.
Consolidates ChromaDB access patterns used by both miners and the MCP server.
"""
import os
import chromadb
SKIP_DIRS = {
".git",
"node_modules",
"__pycache__",
".venv",
"venv",
"env",
"dist",
"build",
".next",
"coverage",
".mempalace",
".ruff_cache",
".mypy_cache",
".pytest_cache",
".cache",
".tox",
".nox",
".idea",
".vscode",
".ipynb_checkpoints",
".eggs",
"htmlcov",
"target",
}
def get_collection(palace_path: str, collection_name: str = "mempalace_drawers"):
"""Get or create the palace ChromaDB collection."""
os.makedirs(palace_path, exist_ok=True)
try:
os.chmod(palace_path, 0o700)
except (OSError, NotImplementedError):
pass
client = chromadb.PersistentClient(path=palace_path)
try:
return client.get_collection(collection_name)
except Exception:
return client.create_collection(collection_name)
def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool:
"""Check if a file has already been filed in the palace.
When check_mtime=True (used by project miner), returns False if the file
has been modified since it was last mined, so it gets re-mined.
When check_mtime=False (used by convo miner), just checks existence.
"""
try:
results = collection.get(where={"source_file": source_file}, limit=1)
if not results.get("ids"):
return False
if check_mtime:
stored_meta = results.get("metadatas", [{}])[0]
stored_mtime = stored_meta.get("source_mtime")
if stored_mtime is None:
return False
current_mtime = os.path.getmtime(source_file)
return float(stored_mtime) == current_mtime
return True
except Exception:
return False