28e263748b
Non-trivial merge in convo_miner.py: this branch's _file_convo_chunks (purge stale + upsert with normalize_version) and develop's _file_chunks_locked (mine_lock + double-checked file_already_mined) both touched the same critical section. Combined into a single _file_chunks_locked helper that does lock → double-check → purge → upsert, preserving both the multi-agent safety guarantee from #784 and the schema-rebuild contract from this PR. Also folds develop's mine_lock import into both miner.py and convo_miner.py alongside NORMALIZE_VERSION. 707/707 tests pass, ruff + format clean under CI-pinned 0.4.x.
135 lines
3.9 KiB
Python
135 lines
3.9 KiB
Python
"""
|
|
palace.py — Shared palace operations.
|
|
|
|
Consolidates collection access patterns used by both miners and the MCP server.
|
|
"""
|
|
|
|
import contextlib
|
|
import hashlib
|
|
import os
|
|
|
|
from .backends.chroma import ChromaBackend
|
|
|
|
SKIP_DIRS = {
|
|
".git",
|
|
"node_modules",
|
|
"__pycache__",
|
|
".venv",
|
|
"venv",
|
|
"env",
|
|
"dist",
|
|
"build",
|
|
".next",
|
|
"coverage",
|
|
".mempalace",
|
|
".ruff_cache",
|
|
".mypy_cache",
|
|
".pytest_cache",
|
|
".cache",
|
|
".tox",
|
|
".nox",
|
|
".idea",
|
|
".vscode",
|
|
".ipynb_checkpoints",
|
|
".eggs",
|
|
"htmlcov",
|
|
"target",
|
|
}
|
|
|
|
_DEFAULT_BACKEND = ChromaBackend()
|
|
|
|
# Schema version for drawer normalization. Bump when the normalization
|
|
# pipeline changes in a way that existing drawers should be rebuilt to pick up
|
|
# (e.g., new noise-stripping rules). `file_already_mined` treats drawers with
|
|
# a missing or stale `normalize_version` as "not mined", so the next mine pass
|
|
# silently rebuilds them — users don't need to manually erase + re-mine.
|
|
#
|
|
# v2 (2026-04): introduced strip_noise() for Claude Code JSONL; previous
|
|
# drawers stored system tags / hook chrome verbatim.
|
|
NORMALIZE_VERSION = 2
|
|
|
|
|
|
def get_collection(
|
|
palace_path: str,
|
|
collection_name: str = "mempalace_drawers",
|
|
create: bool = True,
|
|
):
|
|
"""Get the palace collection through the backend layer."""
|
|
return _DEFAULT_BACKEND.get_collection(
|
|
palace_path,
|
|
collection_name=collection_name,
|
|
create=create,
|
|
)
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def mine_lock(source_file: str):
|
|
"""Cross-platform file lock for mine operations.
|
|
|
|
Prevents multiple agents from mining the same file simultaneously,
|
|
which causes duplicate drawers when the delete+insert cycle interleaves.
|
|
"""
|
|
lock_dir = os.path.join(os.path.expanduser("~"), ".mempalace", "locks")
|
|
os.makedirs(lock_dir, exist_ok=True)
|
|
lock_path = os.path.join(
|
|
lock_dir, hashlib.sha256(source_file.encode()).hexdigest()[:16] + ".lock"
|
|
)
|
|
|
|
lf = open(lock_path, "w")
|
|
try:
|
|
if os.name == "nt":
|
|
import msvcrt
|
|
|
|
msvcrt.locking(lf.fileno(), msvcrt.LK_LOCK, 1)
|
|
else:
|
|
import fcntl
|
|
|
|
fcntl.flock(lf, fcntl.LOCK_EX)
|
|
yield
|
|
finally:
|
|
try:
|
|
if os.name == "nt":
|
|
import msvcrt
|
|
|
|
msvcrt.locking(lf.fileno(), msvcrt.LK_UNLCK, 1)
|
|
else:
|
|
import fcntl
|
|
|
|
fcntl.flock(lf, fcntl.LOCK_UN)
|
|
except Exception:
|
|
pass
|
|
lf.close()
|
|
|
|
|
|
def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool:
|
|
"""Check if a file has already been filed in the palace.
|
|
|
|
Returns False (so the file gets re-mined) when:
|
|
- no drawers exist for this source_file
|
|
- the stored `normalize_version` is missing or older than the current
|
|
schema (triggers silent rebuild after a normalization upgrade)
|
|
- `check_mtime=True` and the file's mtime differs from the stored one
|
|
|
|
When check_mtime=True (used by project miner), also re-mines on content
|
|
change. When check_mtime=False (used by convo miner), transcripts are
|
|
assumed immutable, so only the version gate triggers a rebuild.
|
|
"""
|
|
try:
|
|
results = collection.get(where={"source_file": source_file}, limit=1)
|
|
if not results.get("ids"):
|
|
return False
|
|
stored_meta = results.get("metadatas", [{}])[0] or {}
|
|
# Pre-v2 drawers have no version field — treat them as stale.
|
|
stored_version = stored_meta.get("normalize_version", 1)
|
|
if stored_version < NORMALIZE_VERSION:
|
|
return False
|
|
if check_mtime:
|
|
stored_mtime = stored_meta.get("source_mtime")
|
|
if stored_mtime is None:
|
|
return False
|
|
current_mtime = os.path.getmtime(source_file)
|
|
return abs(float(stored_mtime) - current_mtime) < 0.001
|
|
return True
|
|
except Exception:
|
|
return False
|