c2308a1e36
Review fixes (from Sage's review): - Restore mtime check in file_already_mined (check_mtime=True for miner) - Restore limit=10000 on MCP metadata fetches to prevent OOM on large palaces - Apply _SAFE_NAME_RE regex in sanitize_name (was dead code) - Drop raw_aaak metadata duplication in diary_write - chmod 0o700 on WAL dir, 0o600 on WAL file - Add check_same_thread=False on KnowledgeGraph SQLite connection - Remove __del__ (unreliable) and dead PRAGMA foreign_keys=ON
72 lines
1.9 KiB
Python
72 lines
1.9 KiB
Python
"""
|
|
palace.py — Shared palace operations.
|
|
|
|
Consolidates ChromaDB access patterns used by both miners and the MCP server.
|
|
"""
|
|
|
|
import os
|
|
import chromadb
|
|
|
|
SKIP_DIRS = {
|
|
".git",
|
|
"node_modules",
|
|
"__pycache__",
|
|
".venv",
|
|
"venv",
|
|
"env",
|
|
"dist",
|
|
"build",
|
|
".next",
|
|
"coverage",
|
|
".mempalace",
|
|
".ruff_cache",
|
|
".mypy_cache",
|
|
".pytest_cache",
|
|
".cache",
|
|
".tox",
|
|
".nox",
|
|
".idea",
|
|
".vscode",
|
|
".ipynb_checkpoints",
|
|
".eggs",
|
|
"htmlcov",
|
|
"target",
|
|
}
|
|
|
|
|
|
def get_collection(palace_path: str, collection_name: str = "mempalace_drawers"):
|
|
"""Get or create the palace ChromaDB collection."""
|
|
os.makedirs(palace_path, exist_ok=True)
|
|
try:
|
|
os.chmod(palace_path, 0o700)
|
|
except (OSError, NotImplementedError):
|
|
pass
|
|
client = chromadb.PersistentClient(path=palace_path)
|
|
try:
|
|
return client.get_collection(collection_name)
|
|
except Exception:
|
|
return client.create_collection(collection_name)
|
|
|
|
|
|
def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool:
|
|
"""Check if a file has already been filed in the palace.
|
|
|
|
When check_mtime=True (used by project miner), returns False if the file
|
|
has been modified since it was last mined, so it gets re-mined.
|
|
When check_mtime=False (used by convo miner), just checks existence.
|
|
"""
|
|
try:
|
|
results = collection.get(where={"source_file": source_file}, limit=1)
|
|
if not results.get("ids"):
|
|
return False
|
|
if check_mtime:
|
|
stored_meta = results.get("metadatas", [{}])[0]
|
|
stored_mtime = stored_meta.get("source_mtime")
|
|
if stored_mtime is None:
|
|
return False
|
|
current_mtime = os.path.getmtime(source_file)
|
|
return float(stored_mtime) == current_mtime
|
|
return True
|
|
except Exception:
|
|
return False
|