diff --git a/mempalace/cli.py b/mempalace/cli.py index 964fa84..395130d 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -579,6 +579,84 @@ def cmd_sweep(args): sys.exit(1) +def cmd_sync(args): + """Prune drawers whose source files are gitignored, deleted, or moved (#1252).""" + from .mcp_server import _wal_log + from .palace import MineAlreadyRunning + from .sync import sync_palace + + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + + if not os.path.isdir(palace_path): + print(f"\n No palace found at {palace_path}") + return + + project_dirs = [] + if args.dir: + project_dirs.append(os.path.expanduser(args.dir)) + project_dirs.extend(os.path.expanduser(r) for r in args.root) + project_dirs = project_dirs or None + + print(f"\n{'=' * 55}") + print(" MemPalace Sync — Gitignore-aware drawer prune") + print(f"{'=' * 55}") + print(f" Palace: {palace_path}") + if args.wing: + print(f" Wing: {args.wing}") + if project_dirs: + for p in project_dirs: + print(f" Project: {p}") + if args.dry_run: + print(" Mode: DRY RUN (no deletions)") + else: + print(" Mode: APPLY (deleting drawers)") + print(f"{'-' * 55}\n") + + try: + report = sync_palace( + palace_path=palace_path, + project_dirs=project_dirs, + wing=args.wing, + dry_run=args.dry_run, + wal_log=_wal_log, + ) + except MineAlreadyRunning as exc: + print(f"mempalace: {exc}", file=sys.stderr) + sys.exit(1) + except ValueError as exc: + print(f"mempalace: {exc}", file=sys.stderr) + sys.exit(2) + except Exception as exc: + print(f"mempalace: sync failed: {exc}", file=sys.stderr) + sys.exit(1) + + removed_suffix = "(would remove)" if args.dry_run else "(removed)" + print(f" Scanned: {report['scanned']}") + print(f" Kept: {report['kept']}") + print(f" Gitignored: {report['gitignored']} {removed_suffix}") + print(f" Missing: {report['missing']} {removed_suffix}") + print(f" No source: {report['no_source']} (kept)") + print(f" Out of scope: {report['out_of_scope']} (kept)") + + by_source = report.get("by_source") or {} + if by_source: + top = sorted(by_source.items(), key=lambda kv: -kv[1])[:5] + label = "Top sources to remove" if args.dry_run else "Top sources removed" + print(f"\n {label}:") + for src, n in top: + print(f" {src} ({n})") + + if args.dry_run: + if report["gitignored"] + report["missing"] > 0: + print("\n Re-run with --apply to commit these deletions.") + else: + print( + f"\n Removed {report['removed_drawers']} drawers, {report['removed_closets']} closets." + ) + + print(f"\n{'=' * 55}\n") + + def cmd_search(args): from .searcher import search, SearchError @@ -1214,6 +1292,38 @@ def main(): help="A .jsonl transcript file, or a directory to scan recursively", ) + # sync + p_sync = sub.add_parser( + "sync", + help="Prune drawers whose source files are gitignored, deleted, or moved (#1252)", + ) + p_sync.add_argument( + "dir", + nargs="?", + default=None, + help="Project root to sync (optional; auto-detects from drawer metadata)", + ) + p_sync.add_argument("--wing", default=None, help="Limit to one wing") + p_sync.add_argument( + "--root", + action="append", + default=[], + help="Additional project root (repeatable)", + ) + p_sync.add_argument( + "--dry-run", + dest="dry_run", + action="store_true", + default=True, + help="Preview only (default)", + ) + p_sync.add_argument( + "--apply", + dest="dry_run", + action="store_false", + help="Actually delete drawers (overrides --dry-run; requires --wing or a project root)", + ) + # search p_search = sub.add_parser("search", help="Find anything, exact words") p_search.add_argument("query", help="What to search for") @@ -1422,6 +1532,7 @@ def main(): "split": cmd_split, "search": cmd_search, "sweep": cmd_sweep, + "sync": cmd_sync, "mcp": cmd_mcp, "compress": cmd_compress, "wake-up": cmd_wakeup, diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 521cb07..c88e7d1 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -990,6 +990,40 @@ def tool_delete_drawer(drawer_id: str): return {"success": False, "error": str(e)} +def tool_sync(project_dir: str = None, wing: str = None, apply: bool = False): + """Prune drawers whose source files are gitignored, missing, or moved (#1252).""" + global _metadata_cache + from .palace import MineAlreadyRunning + from .sync import sync_palace + + if not _config.palace_path: + np = _no_palace() + return {"success": False, "error": np.get("error", "no palace"), "hint": np.get("hint")} + project_dirs = [project_dir] if project_dir else None + try: + try: + report = sync_palace( + palace_path=_config.palace_path, + project_dirs=project_dirs, + wing=wing, + dry_run=not apply, + wal_log=_wal_log, + ) + return {"success": True, **report} + # Order matters: typed handlers must precede the bare Exception + # below, otherwise MineAlreadyRunning and ValueError fall into the + # generic "sync failed" branch and break the structured-error tests. + except MineAlreadyRunning as exc: + return {"success": False, "error": f"another mine is in progress: {exc}"} + except ValueError as exc: + return {"success": False, "error": str(exc)} + except Exception as exc: + return {"success": False, "error": f"sync failed: {exc}"} + finally: + if apply: + _metadata_cache = None + + def tool_get_drawer(drawer_id: str): """Fetch a single drawer by ID. Returns full content and metadata.""" col = _get_collection() @@ -1886,6 +1920,24 @@ TOOLS = { }, "handler": tool_delete_drawer, }, + "mempalace_sync": { + "description": "Prune drawers whose source files are gitignored, deleted, or moved. Returns dry-run report by default; pass apply=true to commit deletions.", + "input_schema": { + "type": "object", + "properties": { + "project_dir": { + "type": "string", + "description": "Project root to scope the sync (optional; auto-detected from drawer metadata if omitted)", + }, + "wing": {"type": "string", "description": "Limit to one wing (optional)"}, + "apply": { + "type": "boolean", + "description": "Actually delete drawers; default is dry-run preview", + }, + }, + }, + "handler": tool_sync, + }, "mempalace_get_drawer": { "description": "Fetch a single drawer by ID — returns full content and metadata.", "input_schema": { diff --git a/mempalace/sync.py b/mempalace/sync.py new file mode 100644 index 0000000..788be85 --- /dev/null +++ b/mempalace/sync.py @@ -0,0 +1,321 @@ +""" +sync.py — Gitignore-aware drawer prune (#1252). + +Removes drawers whose source files are now gitignored, deleted, or moved +out of the project. Reuses the same GitignoreMatcher infrastructure that +the miner uses on the way in, so the same rules that block ingest also +drive the corresponding cleanup. + +Usage: + from mempalace.sync import sync_palace + report = sync_palace(palace_path, project_dirs=["/repo"], dry_run=True) +""" + +import logging +from collections import defaultdict +from pathlib import Path +from typing import Callable, Optional, TypedDict + +from .miner import is_gitignored, load_gitignore_matcher +from .palace import ( + MineAlreadyRunning, + get_closets_collection, + get_collection, + mine_palace_lock, +) + + +logger = logging.getLogger(__name__) +_BATCH = 1000 + + +class SyncReport(TypedDict): + scanned: int + kept: int + gitignored: int + missing: int + no_source: int + out_of_scope: int + removed_drawers: int + removed_closets: int + dry_run: bool + by_source: dict[str, int] + + +def _resolve_project_root(source_file: Path, project_roots: list) -> Optional[Path]: + """Return the longest project_root that source_file lives under. + + Assumes ``project_roots`` is sorted by path-length descending so the + first match is the longest (deepest) prefix. + """ + for root in project_roots: + try: + source_file.relative_to(root) + return root + except ValueError: + continue + return None + + +def _ancestor_matchers(source_file: Path, root: Path, matcher_cache: dict) -> list: + """Build the ancestor-chain matcher list, root → file's parent. + + Callers are expected to invoke this only after `_resolve_project_root` + confirms `source_file` lives under `root`. The defensive try/except + keeps the function safe if a future caller skips that check. + """ + matchers: list = [] + try: + parts = source_file.relative_to(root).parts + except ValueError: + return matchers + cursor = root + matcher = load_gitignore_matcher(cursor, matcher_cache) + if matcher is not None: + matchers.append(matcher) + for part in parts[:-1]: + cursor = cursor / part + matcher = load_gitignore_matcher(cursor, matcher_cache) + if matcher is not None: + matchers.append(matcher) + return matchers + + +def _is_registry_row(meta: dict, drawer_id: str) -> bool: + """Convo miner sentinels track 'have I seen this transcript' — preserve them. + + Deleting a `_reg_*` sentinel makes the next mine pass re-chunk and re-embed + the entire transcript even though its content has not changed. + """ + if (meta or {}).get("room") == "_registry": + return True + if (meta or {}).get("ingest_mode") == "registry": + return True + if drawer_id and drawer_id.startswith("_reg_"): + return True + return False + + +def _classify_drawer( + meta: dict, matcher_cache: dict, project_roots: list, drawer_id: str = "" +) -> str: + """Classify a drawer by its source_file metadata. + + Returns one of: kept, gitignored, missing, no_source, out_of_scope. + """ + # Defensive: main loop filters registry rows; this guards direct callers. + if _is_registry_row(meta, drawer_id): + return "kept" + + source_file = (meta or {}).get("source_file") + if not source_file: + return "no_source" + + src = Path(source_file) + if not src.is_absolute(): + return "no_source" + src = src.resolve(strict=False) + + root = _resolve_project_root(src, project_roots) + if root is None: + return "out_of_scope" + + if not src.exists(): + return "missing" + + matchers = _ancestor_matchers(src, root, matcher_cache) + if matchers and is_gitignored(src, matchers, is_dir=False): + return "gitignored" + + return "kept" + + +def _iter_drawer_metadata(col, wing: Optional[str]): + """Yield (id, metadata) tuples from the drawers collection in batches.""" + offset = 0 + where = {"wing": wing} if wing else None + while True: + kwargs = {"include": ["metadatas"], "limit": _BATCH, "offset": offset} + if where: + kwargs["where"] = where + batch = col.get(**kwargs) + ids = batch.get("ids") or [] + metas = batch.get("metadatas") or [] + if not ids: + return + for drawer_id, meta in zip(ids, metas): + yield drawer_id, meta + if len(ids) < _BATCH: + return + offset += len(ids) + + +def _auto_detect_project_roots(col, wing: Optional[str]) -> list: + """Walk drawer metadata once collecting candidate project roots. + + A path is a project root if any ancestor up to filesystem root holds + a `.git` directory or a `.gitignore` file. The deepest such ancestor + wins, so nested-but-still-tracked subprojects are honoured. + `Path.parents` iterates deepest-first, so the first hit IS deepest. + + Dedupes on ``source_file`` string so a 200-chunk file costs one disk + walk, not 200. + """ + roots: set = set() + seen_sources: set = set() + for _, meta in _iter_drawer_metadata(col, wing): + source_file = (meta or {}).get("source_file") + if not source_file or source_file in seen_sources: + continue + seen_sources.add(source_file) + src = Path(source_file) + if not src.is_absolute(): + continue + for parent in src.parents: + if (parent / ".git").exists() or (parent / ".gitignore").is_file(): + roots.add(parent.resolve(strict=False)) + break + return sorted(roots, key=lambda p: (-len(str(p)), str(p))) + + +def _normalize_project_dirs(project_dirs) -> list: + """Resolve and sort project dirs so deepest-prefix wins on first match.""" + resolved = [Path(p).resolve(strict=False) for p in project_dirs] + return sorted(resolved, key=lambda p: (-len(str(p)), str(p))) + + +def _delete_in_batches(col, ids: list, batch_size: int, wal_log: Optional[Callable]): + """Delete drawer IDs in batches, optionally logging each batch to WAL.""" + deleted = 0 + for i in range(0, len(ids), batch_size): + chunk = ids[i : i + batch_size] + col.delete(ids=chunk) + deleted += len(chunk) + if wal_log is not None: + wal_log( + "sync_prune", + {"first_id": chunk[0]}, + {"removed_count": len(chunk)}, + ) + return deleted + + +def sync_palace( + palace_path: str, + project_dirs: Optional[list] = None, + wing: Optional[str] = None, + dry_run: bool = True, + batch_size: int = _BATCH, + wal_log: Optional[Callable] = None, +) -> SyncReport: + """Prune drawers whose source files are gitignored, missing, or moved. + + Returns a SyncReport with bucket counts. Dry-run by default; pass + dry_run=False to actually delete drawers and matching closets. + + Holds ``mine_palace_lock`` for the whole call so the classify pass and + the apply branch see the same drawer snapshot. Raises + ``MineAlreadyRunning`` if another mine is in progress on this palace. + + On apply (``dry_run=False``), at least one of ``wing`` or + ``project_dirs`` must be set so a caller cannot accidentally prune + every wing in a multi-project palace via auto-detected roots. + """ + if not dry_run and not wing and not project_dirs: + raise ValueError( + "sync apply requires explicit wing= or project_dirs= so it cannot " + "auto-prune every wing in a multi-project palace; pass --wing or " + "a project directory" + ) + if project_dirs is not None and not project_dirs: + raise ValueError( + "project_dirs was provided but is empty; pass at least one project " + "root or pass project_dirs=None to auto-detect from drawer metadata" + ) + + counts = { + "scanned": 0, + "kept": 0, + "gitignored": 0, + "missing": 0, + "no_source": 0, + "out_of_scope": 0, + } + by_source: dict = defaultdict(int) + removable_ids: list = [] + removable_sources: set = set() + + with mine_palace_lock(palace_path): + col = get_collection(palace_path, create=False) + + if project_dirs is not None: + roots = _normalize_project_dirs(project_dirs) + else: + roots = _auto_detect_project_roots(col, wing) + + matcher_cache: dict = {} + # Same source_file → same verdict holds because mine_palace_lock + # blocks concurrent writers and the loop is synchronous. + classification_cache: dict = {} + + for drawer_id, meta in _iter_drawer_metadata(col, wing): + counts["scanned"] += 1 + meta = meta or {} + source_file = meta.get("source_file") + + if _is_registry_row(meta, drawer_id): + bucket = "kept" + elif source_file and source_file in classification_cache: + bucket = classification_cache[source_file] + else: + bucket = _classify_drawer(meta, matcher_cache, roots, drawer_id) + if source_file: + classification_cache[source_file] = bucket + + counts[bucket] += 1 + if bucket in ("gitignored", "missing"): + removable_ids.append(drawer_id) + if source_file: + removable_sources.add(source_file) + by_source[source_file] += 1 + + report: SyncReport = { + **counts, + "removed_drawers": 0, + "removed_closets": 0, + "dry_run": dry_run, + "by_source": dict(by_source), + } + + if dry_run or not removable_ids: + return report + + report["removed_drawers"] = _delete_in_batches(col, removable_ids, batch_size, wal_log) + + closets_col = None + try: + closets_col = get_closets_collection(palace_path, create=False) + except Exception as exc: + logger.warning("Closet purge skipped (collection unavailable): %s", exc) + + closets_removed = 0 + if closets_col is not None and removable_sources: + closet_ids = ( + closets_col.get( + where={"source_file": {"$in": list(removable_sources)}}, + include=[], + ).get("ids") + or [] + ) + if closet_ids: + closets_col.delete(ids=closet_ids) + closets_removed = len(closet_ids) + report["removed_closets"] = closets_removed + return report + + +__all__ = [ + "MineAlreadyRunning", + "SyncReport", + "sync_palace", +] diff --git a/tests/test_sync.py b/tests/test_sync.py new file mode 100644 index 0000000..ea17040 --- /dev/null +++ b/tests/test_sync.py @@ -0,0 +1,1399 @@ +""" +test_sync.py — Tests for `mempalace.sync` (gitignore-aware drawer prune, #1252). + +Builds a focused fixture: a temp project with .gitignore + on-disk files + +matching drawers, exercising every classification bucket sync produces. +""" + +import os +from pathlib import Path + +import chromadb +import pytest + + +def _seed_drawers(palace_path, repo_path, deleted_path, elsewhere_path): + """Populate the drawers collection with 6 entries covering all buckets.""" + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + + metas = [ + { + "wing": "demo", + "room": "src", + "source_file": str(repo_path / "src" / "keep.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "build", + "source_file": str(repo_path / "build" / "ignored.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "logs", + "source_file": str(repo_path / "app.log"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "stale", + "source_file": str(deleted_path), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "convo", + # No source_file key — convo / explicit-add drawers. + "chunk_index": 0, + "added_by": "convo_miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "elsewhere", + "source_file": str(elsewhere_path), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + ] + + col.add( + ids=[ + "drawer_keep", + "drawer_gitignored_dir", + "drawer_gitignored_glob", + "drawer_missing", + "drawer_no_source", + "drawer_out_of_scope", + ], + documents=[f"doc {i}" for i in range(6)], + embeddings=[[float(i + 1), 0.0, 0.0] for i in range(6)], + metadatas=metas, + ) + del client + + +@pytest.fixture +def synced_world(tmp_dir, palace_path): + """Temp project with .gitignore + on-disk files + matching drawers.""" + repo_path = Path(tmp_dir) / "repo" + (repo_path / "src").mkdir(parents=True) + (repo_path / "build").mkdir() + + # .gitignore: ignore build/ directory and any *.log file + (repo_path / ".gitignore").write_text("build/\n*.log\n") + + # Files that exist on disk + (repo_path / "src" / "keep.py").write_text("# keep\n") + (repo_path / "build" / "ignored.py").write_text("# ignored by gitignore\n") + (repo_path / "app.log").write_text("log line\n") + + # File that the drawer points to but no longer exists + deleted = repo_path / "deleted.py" + deleted.write_text("# was here\n") + deleted.unlink() + + # Use tmp_dir for an absolute path; `/tmp/...` literals are not absolute on Windows. + elsewhere = Path(tmp_dir) / "elsewhere" / "x.md" + + _seed_drawers(palace_path, repo_path, deleted, elsewhere) + return {"palace_path": palace_path, "repo_path": str(repo_path)} + + +def _open_drawers(palace_path): + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + return client, col + + +def _drawer_ids(col): + return set(col.get(include=[])["ids"]) + + +class TestSyncPalace: + def test_dry_run_classifies_correctly(self, synced_world): + from mempalace.sync import sync_palace + + report = sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=True, + ) + assert report["scanned"] == 6 + assert report["gitignored"] == 2 # build/ignored.py, app.log + assert report["missing"] == 1 # deleted.py + assert report["no_source"] == 1 + assert report["out_of_scope"] == 1 + assert report["kept"] == 1 # only src/keep.py + assert report["dry_run"] is True + assert report["removed_drawers"] == 0 + + # Mutation check — collection still has all 6 drawers. + client, col = _open_drawers(synced_world["palace_path"]) + try: + assert len(_drawer_ids(col)) == 6 + finally: + del client + + def test_apply_removes_gitignored_and_missing(self, synced_world): + from mempalace.sync import sync_palace + + report = sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + assert report["dry_run"] is False + assert report["removed_drawers"] == 3 # 2 gitignored + 1 missing + + client, col = _open_drawers(synced_world["palace_path"]) + try: + survivors = _drawer_ids(col) + assert survivors == { + "drawer_keep", + "drawer_no_source", + "drawer_out_of_scope", + } + finally: + del client + + def test_dry_run_does_not_touch_collection(self, synced_world): + from mempalace.sync import sync_palace + + client, col = _open_drawers(synced_world["palace_path"]) + before = _drawer_ids(col) + del client + + sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=True, + ) + + client, col = _open_drawers(synced_world["palace_path"]) + try: + after = _drawer_ids(col) + finally: + del client + assert before == after + + def test_wing_scope_filters(self, tmp_dir, palace_path): + """A drawer in another wing must survive a wing-scoped sync.""" + from mempalace.sync import sync_palace + + repo_path = Path(tmp_dir) / "repo" + (repo_path / "build").mkdir(parents=True) + (repo_path / ".gitignore").write_text("build/\n") + (repo_path / "build" / "ignored.py").write_text("# ignored\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=["d_demo", "d_other"], + documents=["x", "y"], + embeddings=[[1.0, 0.0, 0.0], [2.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "build", + "source_file": str(repo_path / "build" / "ignored.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "other", + "room": "build", + "source_file": str(repo_path / "build" / "ignored.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + ], + ) + del client + + sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + wing="demo", + dry_run=False, + ) + + client, col = _open_drawers(palace_path) + try: + assert _drawer_ids(col) == {"d_other"} + finally: + del client + + def test_no_source_file_drawers_preserved_on_apply(self, synced_world): + from mempalace.sync import sync_palace + + sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + client, col = _open_drawers(synced_world["palace_path"]) + try: + assert "drawer_no_source" in _drawer_ids(col) + finally: + del client + + def test_out_of_scope_drawers_preserved(self, synced_world): + from mempalace.sync import sync_palace + + sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + client, col = _open_drawers(synced_world["palace_path"]) + try: + assert "drawer_out_of_scope" in _drawer_ids(col) + finally: + del client + + def test_negated_gitignore_rules_respected(self, tmp_dir, palace_path): + """`!build/keep.py` must un-ignore one specific file under build/.""" + from mempalace.sync import sync_palace + + repo_path = Path(tmp_dir) / "repo" + (repo_path / "build").mkdir(parents=True) + (repo_path / ".gitignore").write_text("build/\n!build/keep.py\n") + (repo_path / "build" / "keep.py").write_text("# survivor\n") + (repo_path / "build" / "doomed.py").write_text("# doomed\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=["d_keep", "d_doom"], + documents=["x", "y"], + embeddings=[[1.0, 0.0, 0.0], [2.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "build", + "source_file": str(repo_path / "build" / "keep.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "build", + "source_file": str(repo_path / "build" / "doomed.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + ], + ) + del client + + sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + dry_run=False, + ) + + client, col = _open_drawers(palace_path) + try: + survivors = _drawer_ids(col) + finally: + del client + assert "d_keep" in survivors + assert "d_doom" not in survivors + + def test_nested_gitignore_layers(self, tmp_dir, palace_path): + """Subdir .gitignore can deny what root allows.""" + from mempalace.sync import sync_palace + + repo_path = Path(tmp_dir) / "repo" + (repo_path / "vendor").mkdir(parents=True) + # Root gitignore is empty. + (repo_path / ".gitignore").write_text("\n") + # Subdir gitignore ignores everything under vendor/. + (repo_path / "vendor" / ".gitignore").write_text("*.py\n") + (repo_path / "vendor" / "lib.py").write_text("# nested-ignored\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=["d_nested"], + documents=["x"], + embeddings=[[1.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "vendor", + "source_file": str(repo_path / "vendor" / "lib.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + ], + ) + del client + + sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + dry_run=False, + ) + + client, col = _open_drawers(palace_path) + try: + assert "d_nested" not in _drawer_ids(col) + finally: + del client + + def test_closet_purge_runs_on_apply(self, synced_world): + """Closets pointing at removed sources must also disappear.""" + from mempalace.sync import sync_palace + + # Seed a closet referencing the to-be-pruned ignored.py source. + client = chromadb.PersistentClient(path=synced_world["palace_path"]) + closets = client.get_or_create_collection( + "mempalace_closets", metadata={"hnsw:space": "cosine"} + ) + ignored_path = str(Path(synced_world["repo_path"]) / "build" / "ignored.py") + closets.add( + ids=["closet_ignored_01"], + documents=["topic line"], + embeddings=[[1.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "build", + "source_file": ignored_path, + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + ], + ) + del client + + report = sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + assert report["removed_closets"] >= 1 + + client = chromadb.PersistentClient(path=synced_world["palace_path"]) + closets = client.get_or_create_collection( + "mempalace_closets", metadata={"hnsw:space": "cosine"} + ) + try: + assert closets.get(ids=["closet_ignored_01"])["ids"] == [] + finally: + del client + + def test_handles_empty_palace(self, palace_path): + from mempalace.sync import sync_palace + + client = chromadb.PersistentClient(path=palace_path) + client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + del client + + report = sync_palace(palace_path=palace_path, dry_run=True) + assert report["scanned"] == 0 + assert report["removed_drawers"] == 0 + + def test_emits_wal_entries_on_apply(self, synced_world): + from mempalace.sync import sync_palace + + seen = [] + + def fake_wal(operation, params, result=None): + seen.append((operation, params, result)) + + sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + wal_log=fake_wal, + ) + + ops = [op for op, _, _ in seen] + assert "sync_prune" in ops + # F4 — result payload carries the audit trail. + sync_entry = next(e for e in seen if e[0] == "sync_prune") + op, params, result = sync_entry + assert result is not None and "removed_count" in result + assert result["removed_count"] >= 1 + # Allow-list — params must be exactly the documented audit shape so + # any future leak (source_file, content, ID lists, etc.) trips a + # test failure rather than slipping through a deny-list. + assert set(params.keys()) <= { + "first_id" + }, f"WAL params drifted from the audit allow-list: {params.keys()}" + + def test_registry_sentinels_preserved_on_apply(self, tmp_dir, palace_path): + """F2 regression: convo miner `_reg_*` sentinels must survive sync apply. + + Deleting them forces full re-mine + re-embed of the transcript on the + next miner run, even though the transcript content has not changed. + """ + from mempalace.sync import sync_palace + + repo_path = Path(tmp_dir) / "repo" + repo_path.mkdir(parents=True) + (repo_path / ".gitignore").write_text("transcripts/\n") + (repo_path / "transcripts").mkdir() + moved_transcript = repo_path / "transcripts" / "convo.jsonl" + moved_transcript.write_text("{}\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=[ + "_reg_abc123_room_match", + "_reg_def456_meta_match", + "_reg_ghi789_id_match", + ], + documents=["[registry] x", "[registry] y", "[registry] z"], + embeddings=[[1.0, 0.0, 0.0], [2.0, 0.0, 0.0], [3.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "_registry", + "source_file": str(moved_transcript), + "chunk_index": 0, + "added_by": "convo_miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "convo", + "source_file": str(moved_transcript), + "chunk_index": 0, + "added_by": "convo_miner", + "filed_at": "2026-05-09T00:00:00", + "ingest_mode": "registry", + }, + { + "wing": "demo", + "room": "convo", + "source_file": str(moved_transcript), + "chunk_index": 0, + "added_by": "convo_miner", + "filed_at": "2026-05-09T00:00:00", + }, + ], + ) + del client + + # Sentinel transcript is gitignored; without F2 it would also delete + # the `_reg_*` sentinel rows. + sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + dry_run=False, + ) + + client, col = _open_drawers(palace_path) + try: + survivors = _drawer_ids(col) + finally: + del client + assert "_reg_abc123_room_match" in survivors # room=_registry + assert "_reg_def456_meta_match" in survivors # ingest_mode=registry + assert "_reg_ghi789_id_match" in survivors # id prefix + + def test_auto_detect_picks_deepest_root(self, tmp_dir, palace_path): + """F3 regression (white-box): when multiple ancestors hold markers + the DEEPEST one wins. Direct assertion on the helper avoids the + tautology of round-1's classifier-based test where ancestor walks + loaded the same matcher chain regardless of which root was picked. + """ + from mempalace.sync import _auto_detect_project_roots + + outer = Path(tmp_dir) / "outer" + inner = outer / "inner" + inner.mkdir(parents=True) + # Both have markers. Deepest wins. + (outer / ".gitignore").write_text("*.txt\n") + (inner / ".gitignore").write_text("*.py\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=["d_inner"], + documents=["x"], + embeddings=[[1.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "src", + "source_file": str(inner / "x.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + ], + ) + del client + + client, col = _open_drawers(palace_path) + try: + roots = _auto_detect_project_roots(col, wing="demo") + finally: + del client + + inner_resolved = inner.resolve(strict=False) + outer_resolved = outer.resolve(strict=False) + assert inner_resolved in roots, f"expected inner in roots, got {roots}" + assert ( + outer_resolved not in roots + ), f"deepest should win exclusively: roots={roots}, outer leaked" + + def test_apply_with_empty_project_dirs_raises(self, palace_path): + """Round-2 P1: `project_dirs=[]` (empty list) with apply must raise, + not silently classify everything as out_of_scope.""" + from mempalace.sync import sync_palace + + client = chromadb.PersistentClient(path=palace_path) + client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + del client + + with pytest.raises(ValueError, match="empty"): + sync_palace( + palace_path=palace_path, + project_dirs=[], + wing="demo", + dry_run=False, + ) + + def test_closet_log_warning_when_collection_unavailable( + self, monkeypatch, synced_world, caplog + ): + """F7 regression: closets-collection-missing logs a warning.""" + import logging + + from mempalace import sync as sync_mod + from mempalace.sync import sync_palace + + def boom(*args, **kwargs): + raise RuntimeError("simulated missing closets collection") + + monkeypatch.setattr(sync_mod, "get_closets_collection", boom) + + with caplog.at_level(logging.WARNING, logger="mempalace.sync"): + sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + assert any( + "Closet purge skipped" in record.getMessage() for record in caplog.records + ), f"expected closet-skip warning, got: {[r.getMessage() for r in caplog.records]}" + + def test_metadata_cache_cleared_on_exception(self, monkeypatch, config, synced_world, kg): + """F9 regression: tool_sync's try/finally must clear `_metadata_cache` + even if sync_palace raises mid-apply. + + Tracks an explicit `called` flag on the explode mock so a refactor + that bypasses the patched name (and lets the real sync_palace run) + cannot fake-pass — the assertion below verifies the patched explode + actually ran before the cache was cleared. + """ + from mempalace import mcp_server + + # Reconfigure to point at synced_world. + from mempalace.config import MempalaceConfig + import json + + cfg_dir = Path(synced_world["palace_path"]).parent / "cfg_for_cache_test" + cfg_dir.mkdir(parents=True, exist_ok=True) + with open(cfg_dir / "config.json", "w") as f: + json.dump({"palace_path": synced_world["palace_path"]}, f) + monkeypatch.setattr(mcp_server, "_config", MempalaceConfig(config_dir=str(cfg_dir))) + monkeypatch.setattr(mcp_server, "_get_kg", lambda: kg) + monkeypatch.setattr(mcp_server, "_metadata_cache", ["dirty-cache-marker"]) + + called = {"n": 0} + + def explode(*args, **kwargs): + called["n"] += 1 + raise RuntimeError("simulated mid-apply failure") + + monkeypatch.setattr("mempalace.sync.sync_palace", explode) + + # tool_sync's broad except catches RuntimeError → returns structured error. + result = mcp_server.tool_sync( + project_dir=synced_world["repo_path"], wing="demo", apply=True + ) + assert called["n"] == 1, "explode mock did not actually run; test is a fake-pass" + assert result.get("success") is False + assert "simulated" in result.get("error", "") + + assert ( + mcp_server._metadata_cache is None + ), "F9: cache must be cleared even when sync_palace raises" + + def test_sync_report_keys_stable(self, synced_world): + """Regression: SyncReport schema must not silently drop a field.""" + from mempalace.sync import sync_palace + + report = sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=True, + ) + expected = { + "scanned", + "kept", + "gitignored", + "missing", + "no_source", + "out_of_scope", + "removed_drawers", + "removed_closets", + "dry_run", + "by_source", + } + assert set(report.keys()) == expected + + def test_batch_size_boundary(self, tmp_dir, palace_path): + """`_delete_in_batches` correctness at batch_size smaller than dataset.""" + from mempalace.sync import sync_palace + + repo_path = Path(tmp_dir) / "repo" + repo_path.mkdir(parents=True) + (repo_path / ".gitignore").write_text("ignored/\n") + (repo_path / "ignored").mkdir() + n = 5 + for i in range(n): + (repo_path / "ignored" / f"f{i}.py").write_text(f"# {i}\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=[f"d_{i}" for i in range(n)], + documents=[f"x{i}" for i in range(n)], + embeddings=[[float(i + 1), 0.0, 0.0] for i in range(n)], + metadatas=[ + { + "wing": "demo", + "room": "ignored", + "source_file": str(repo_path / "ignored" / f"f{i}.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + for i in range(n) + ], + ) + del client + + seen = [] + + def fake_wal(operation, params, result=None): + if operation == "sync_prune": + seen.append(result["removed_count"]) + + report = sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + wing="demo", + dry_run=False, + batch_size=2, + wal_log=fake_wal, + ) + assert report["removed_drawers"] == n + # 5 ids at batch_size=2 → chunks of 2,2,1 → 3 wal entries + assert seen == [2, 2, 1], f"unexpected chunk sizes: {seen}" + + def test_apply_is_idempotent(self, synced_world): + """Round-3: a second apply on the same palace must be a no-op.""" + from mempalace.sync import sync_palace + + first = sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + assert first["removed_drawers"] >= 1 + + second = sync_palace( + palace_path=synced_world["palace_path"], + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + assert second["removed_drawers"] == 0 + assert second["gitignored"] == 0 + assert second["missing"] == 0 + + def test_relative_source_file_classified_as_no_source(self, tmp_dir, palace_path): + """Round-3: a drawer whose source_file metadata is relative is upstream + corruption (miner writes absolute paths). Sync must NOT guess at + path resolution; it routes the drawer to `no_source` and leaves it.""" + from mempalace.sync import sync_palace + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=["d_relative"], + documents=["x"], + embeddings=[[1.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "src", + "source_file": "relative/path.py", # malformed, not absolute + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + ], + ) + del client + + repo_path = Path(tmp_dir) / "repo" + repo_path.mkdir() + (repo_path / ".gitignore").write_text("*.py\n") + + report = sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + wing="demo", + dry_run=False, + ) + assert report["no_source"] == 1 + assert report["removed_drawers"] == 0 + + client, col = _open_drawers(palace_path) + try: + assert "d_relative" in _drawer_ids(col) + finally: + del client + + def test_overlapping_project_dirs_picks_longest(self, tmp_dir, palace_path): + """`_resolve_project_root` longest-prefix matching: nested project + dirs both contain the source; the deeper (longer) one wins.""" + from mempalace.sync import sync_palace + + outer = Path(tmp_dir) / "outer" + inner = outer / "inner" + inner.mkdir(parents=True) + # Outer .gitignore would NOT block file. Inner .gitignore blocks it. + (outer / ".gitignore").write_text("# empty\n") + (inner / ".gitignore").write_text("x.py\n") + (inner / "x.py").write_text("# inner-ignored\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=["d_x"], + documents=["x"], + embeddings=[[1.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "src", + "source_file": str(inner / "x.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + ], + ) + del client + + # Pass BOTH outer AND inner as project_dirs. inner is the longest + # prefix, so it should be the chosen root and inner/.gitignore + # rules apply (file is ignored → drawer removed). + report = sync_palace( + palace_path=palace_path, + project_dirs=[str(outer), str(inner)], + wing="demo", + dry_run=False, + ) + assert report["gitignored"] == 1, f"expected 1 gitignored, got {report}" + + def test_apply_without_scope_raises(self, palace_path): + """F6: apply=True with both wing=None AND project_dirs=None refuses.""" + from mempalace.sync import sync_palace + + # Empty palace; we never reach delete code, but the guard must fire + # before any work. + client = chromadb.PersistentClient(path=palace_path) + client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + del client + + with pytest.raises(ValueError, match="explicit wing="): + sync_palace(palace_path=palace_path, dry_run=False) + + # Dry-run with no scope is still allowed — preview is read-only. + report = sync_palace(palace_path=palace_path, dry_run=True) + assert report["dry_run"] is True + + @pytest.mark.skipif(os.name == "nt", reason="fcntl-based contention test is POSIX only") + def test_mine_already_running_propagates(self, synced_world): + """F1 + T4: sync acquires `mine_palace_lock` for the whole call. + + Hold the palace lock via raw fcntl on a separate open file + description; mine_palace_lock opens its own handle and must + raise MineAlreadyRunning rather than silently running against + a partial snapshot. + """ + import fcntl + import hashlib + + from mempalace.palace import MineAlreadyRunning + from mempalace.sync import sync_palace + + palace_path = synced_world["palace_path"] + resolved = os.path.realpath(os.path.expanduser(palace_path)) + palace_key = hashlib.sha256(os.path.normcase(resolved).encode()).hexdigest()[:16] + lock_dir = os.path.join(os.path.expanduser("~"), ".mempalace", "locks") + os.makedirs(lock_dir, exist_ok=True) + lock_path = os.path.join(lock_dir, f"mine_palace_{palace_key}.lock") + Path(lock_path).touch() + + with open(lock_path, "r+") as lf: + fcntl.flock(lf, fcntl.LOCK_EX | fcntl.LOCK_NB) + try: + with pytest.raises(MineAlreadyRunning): + sync_palace( + palace_path=palace_path, + project_dirs=[synced_world["repo_path"]], + dry_run=True, + ) + finally: + fcntl.flock(lf, fcntl.LOCK_UN) + + # Lock released — sync now succeeds. + sync_palace( + palace_path=palace_path, + project_dirs=[synced_world["repo_path"]], + dry_run=True, + ) + + @pytest.mark.skipif(os.name == "nt", reason="os.symlink needs admin on Windows") + def test_symlinked_project_root_resolves(self, tmp_dir, palace_path): + """source_file may be written through a symlinked tmp directory + (real macOS behaviour: /var/folders/... is a symlink to + /private/var/folders/...). project_dirs goes through .resolve() + which follows the symlink. Without matching .resolve() on the + source side, _resolve_project_root would mis-bucket every drawer + as out_of_scope. This test pins symmetric resolution. + """ + from mempalace.sync import sync_palace + + real_root = Path(tmp_dir) / "real" + (real_root / "build").mkdir(parents=True) + (real_root / ".gitignore").write_text("build/\n") + (real_root / "build" / "x.py").write_text("# ignored\n") + + link_root = Path(tmp_dir) / "link" + os.symlink(str(real_root), str(link_root)) + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=["d_via_link"], + documents=["x"], + embeddings=[[1.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "build", + "source_file": str(link_root / "build" / "x.py"), + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + ], + ) + del client + + report = sync_palace( + palace_path=palace_path, + project_dirs=[str(real_root)], + wing="demo", + dry_run=True, + ) + assert ( + report["gitignored"] == 1 + ), f"symmetric resolve broken: drawer mis-bucketed; report={report}" + assert report["out_of_scope"] == 0 + + def test_classification_cache_avoids_redundant_disk_hits( + self, tmp_dir, palace_path, monkeypatch + ): + """Per-file classification cache: N chunks of the same source_file + cost one _classify_drawer invocation, not N. Verifies the perf + optimisation actually short-circuits without changing behaviour. + """ + from mempalace import sync as sync_mod + from mempalace.sync import sync_palace + + repo_path = Path(tmp_dir) / "repo" + (repo_path / "build").mkdir(parents=True) + (repo_path / ".gitignore").write_text("build/\n") + (repo_path / "build" / "shared.py").write_text("# ignored\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + col.add( + ids=[f"d_chunk_{i}" for i in range(5)], + documents=[f"chunk{i}" for i in range(5)], + embeddings=[[float(i + 1), 0.0, 0.0] for i in range(5)], + metadatas=[ + { + "wing": "demo", + "room": "build", + "source_file": str(repo_path / "build" / "shared.py"), + "chunk_index": i, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + } + for i in range(5) + ], + ) + del client + + call_count = {"n": 0} + real_classify = sync_mod._classify_drawer + + def counting_classify(*args, **kwargs): + call_count["n"] += 1 + return real_classify(*args, **kwargs) + + monkeypatch.setattr(sync_mod, "_classify_drawer", counting_classify) + + report = sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + wing="demo", + dry_run=True, + ) + assert report["scanned"] == 5 + assert report["gitignored"] == 5 + assert ( + call_count["n"] == 1 + ), f"cache miss: expected 1 _classify_drawer call (4 cache hits), got {call_count['n']}" + + def test_closet_batch_purge_single_call(self, synced_world, monkeypatch): + """Batched $in closet purge: one delete() call across all removable + source files, not N. Wraps the real collection so chromadb still + does the work; only the call count is intercepted. + """ + from mempalace import sync as sync_mod + + repo_path = Path(synced_world["repo_path"]) + palace_path = synced_world["palace_path"] + + client = chromadb.PersistentClient(path=palace_path) + closets_col = client.get_or_create_collection( + "mempalace_closets", metadata={"hnsw:space": "cosine"} + ) + closets_col.add( + ids=["c1", "c2", "c3"], + documents=["c1", "c2", "c3"], + embeddings=[[1.0, 0.0, 0.0], [2.0, 0.0, 0.0], [3.0, 0.0, 0.0]], + metadatas=[ + {"source_file": str(repo_path / "build" / "ignored.py")}, + {"source_file": str(repo_path / "app.log")}, + {"source_file": str(repo_path / "deleted.py")}, + ], + ) + del client + + class CallCountingCol: + def __init__(self, real): + self._real = real + self.delete_calls = 0 + self.get_calls = 0 + + def get(self, *args, **kwargs): + self.get_calls += 1 + return self._real.get(*args, **kwargs) + + def delete(self, *args, **kwargs): + self.delete_calls += 1 + return self._real.delete(*args, **kwargs) + + captured: dict = {} + real_get_closets = sync_mod.get_closets_collection + + def wrapped_get_closets(p, create=False): + real = real_get_closets(p, create=create) + wrapper = CallCountingCol(real) + captured["wrapper"] = wrapper + return wrapper + + monkeypatch.setattr(sync_mod, "get_closets_collection", wrapped_get_closets) + + from mempalace.sync import sync_palace + + report = sync_palace( + palace_path=palace_path, + project_dirs=[synced_world["repo_path"]], + dry_run=False, + ) + + seeded_sources = { + str(repo_path / "build" / "ignored.py"), + str(repo_path / "app.log"), + str(repo_path / "deleted.py"), + } + expected = len(seeded_sources & set(report["by_source"].keys())) + assert ( + report["removed_closets"] == expected + ), f"removed_closets ({report['removed_closets']}) != |seeded ∩ removable| ({expected})" + assert "wrapper" in captured, "get_closets_collection patch not invoked" + assert ( + captured["wrapper"].delete_calls == 1 + ), f"expected one batch delete call, got {captured['wrapper'].delete_calls}" + assert ( + captured["wrapper"].get_calls == 1 + ), f"expected one batch get call, got {captured['wrapper'].get_calls}" + + def test_registry_check_runs_before_cache_lookup(self, tmp_dir, palace_path): + """A non-registry drawer with the same source_file must NOT poison + the bucket of a subsequent _reg_* drawer via the classification + cache. Order matters for chromadb iteration: seed the regular + drawer FIRST so it caches `gitignored`, then a registry sentinel + with the same source_file. Without the registry-bypass at the + top of the main loop, the cache lookup would route the sentinel + to gitignored and delete it. + """ + from mempalace.sync import sync_palace + + repo_path = Path(tmp_dir) / "repo" + (repo_path / "build").mkdir(parents=True) + (repo_path / ".gitignore").write_text("build/\n") + (repo_path / "build" / "shared.py").write_text("# ignored\n") + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) + shared_source = str(repo_path / "build" / "shared.py") + col.add( + ids=["a_regular", "_reg_zzz_sentinel"], + documents=["regular chunk", "registry sentinel"], + embeddings=[[1.0, 0.0, 0.0], [2.0, 0.0, 0.0]], + metadatas=[ + { + "wing": "demo", + "room": "build", + "source_file": shared_source, + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-09T00:00:00", + }, + { + "wing": "demo", + "room": "_registry", + "source_file": shared_source, + "chunk_index": 0, + "ingest_mode": "registry", + "added_by": "convo_miner", + "filed_at": "2026-05-09T00:00:00", + }, + ], + ) + del client + + report = sync_palace( + palace_path=palace_path, + project_dirs=[str(repo_path)], + wing="demo", + dry_run=False, + ) + assert report["gitignored"] == 1 + assert report["kept"] == 1 + assert report["removed_drawers"] == 1 + + client, col = _open_drawers(palace_path) + try: + survivors = _drawer_ids(col) + finally: + del client + assert "a_regular" not in survivors + assert ( + "_reg_zzz_sentinel" in survivors + ), "registry sentinel was incorrectly pruned via cached non-registry verdict" + + def test_normalize_project_dirs_sort_stable_on_equal_length(self): + """`_normalize_project_dirs` must sort by `(-len, str)` so equal-length + roots are alphabetically deterministic; otherwise overlapping nested + scope choice depends on argv order. + """ + from mempalace.sync import _normalize_project_dirs + + result = _normalize_project_dirs(["/tmp/zzz", "/tmp/aaa"]) + names = [p.name for p in result] + assert names == ["aaa", "zzz"], f"equal-length sort not deterministic: got {names}" + + # Different lengths: deepest first. + deep = _normalize_project_dirs(["/tmp/short", "/tmp/much/deeper/path"]) + assert str(deep[0]).endswith("path") + assert str(deep[1]).endswith("short") + + +class TestSyncMcpTool: + """T2: `mempalace_sync` MCP entry point must keep apply polarity stable.""" + + def _patch(self, monkeypatch, config, kg): + from mempalace import mcp_server + + monkeypatch.setattr(mcp_server, "_config", config) + monkeypatch.setattr(mcp_server, "_get_kg", lambda: kg) + + def test_default_is_dry_run(self, monkeypatch, config, palace_path, kg): + from mempalace import mcp_server + + self._patch(monkeypatch, config, kg) + client = chromadb.PersistentClient(path=palace_path) + client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + del client + + report = mcp_server.tool_sync(project_dir=palace_path) + assert report["dry_run"] is True + + def test_success_true_on_dry_run(self, monkeypatch, config, palace_path, kg): + """Round-4: success path returns `success: True` for API symmetry + with the structured-error branches that all return `success: False`.""" + from mempalace import mcp_server + + self._patch(monkeypatch, config, kg) + client = chromadb.PersistentClient(path=palace_path) + client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + del client + + report = mcp_server.tool_sync(project_dir=palace_path) + assert report.get("success") is True + assert report.get("dry_run") is True + + def test_apply_true_is_destructive(self, monkeypatch, config, synced_world, kg): + from mempalace import mcp_server + + # Rebuild config to point at synced_world's palace. + from mempalace.config import MempalaceConfig + import json + + cfg_dir = Path(synced_world["palace_path"]).parent / "cfg_for_mcp_test" + cfg_dir.mkdir(parents=True, exist_ok=True) + with open(cfg_dir / "config.json", "w") as f: + json.dump({"palace_path": synced_world["palace_path"]}, f) + cfg = MempalaceConfig(config_dir=str(cfg_dir)) + self._patch(monkeypatch, cfg, kg) + + report = mcp_server.tool_sync( + project_dir=synced_world["repo_path"], apply=True, wing="demo" + ) + assert report["dry_run"] is False + assert report["removed_drawers"] >= 1 + + def test_no_palace_returns_structured_error(self, monkeypatch, kg): + """Round-3: tool_sync must keep the {success:False,error:...} contract + even on the early `_no_palace` short-circuit, not return the bare + legacy `{error,hint}` dict.""" + from mempalace import mcp_server + + class _EmptyConfig: + palace_path = "" + collection_name = "mempalace_drawers" + + monkeypatch.setattr(mcp_server, "_config", _EmptyConfig()) + monkeypatch.setattr(mcp_server, "_get_kg", lambda: kg) + + result = mcp_server.tool_sync() + assert result.get("success") is False + assert "error" in result + + def test_apply_without_scope_returns_structured_error( + self, monkeypatch, config, palace_path, kg + ): + """Round-2 P0: tool_sync must return {success: False, error: ...} + rather than letting ValueError propagate to the MCP client.""" + from mempalace import mcp_server + + client = chromadb.PersistentClient(path=palace_path) + client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + del client + + self._patch(monkeypatch, config, kg) + result = mcp_server.tool_sync(apply=True) # no project_dir, no wing + assert result.get("success") is False + assert "wing=" in result.get("error", "") or "project_dirs" in result.get("error", "") + + @pytest.mark.skipif(os.name == "nt", reason="fcntl-based contention test is POSIX only") + def test_lock_contention_returns_structured_error(self, monkeypatch, config, synced_world, kg): + """Round-2 P0: tool_sync with apply=True under contention returns + a structured `{success: False, error: ...}` instead of raising.""" + import fcntl + import hashlib + + from mempalace import mcp_server + from mempalace.config import MempalaceConfig + import json + + # Wire MCP config at synced_world. + cfg_dir = Path(synced_world["palace_path"]).parent / "cfg_for_lock_test" + cfg_dir.mkdir(parents=True, exist_ok=True) + with open(cfg_dir / "config.json", "w") as f: + json.dump({"palace_path": synced_world["palace_path"]}, f) + self._patch(monkeypatch, MempalaceConfig(config_dir=str(cfg_dir)), kg) + + # Compute lock path the same way mine_palace_lock does. + resolved = os.path.realpath(os.path.expanduser(synced_world["palace_path"])) + palace_key = hashlib.sha256(os.path.normcase(resolved).encode()).hexdigest()[:16] + lock_dir = os.path.join(os.path.expanduser("~"), ".mempalace", "locks") + os.makedirs(lock_dir, exist_ok=True) + lock_path = os.path.join(lock_dir, f"mine_palace_{palace_key}.lock") + Path(lock_path).touch() + + with open(lock_path, "r+") as lf: + fcntl.flock(lf, fcntl.LOCK_EX | fcntl.LOCK_NB) + try: + result = mcp_server.tool_sync( + project_dir=synced_world["repo_path"], wing="demo", apply=True + ) + finally: + fcntl.flock(lf, fcntl.LOCK_UN) + + assert result.get("success") is False + assert "another mine" in result.get("error", "").lower() + + +class TestSyncCli: + """T1: `cmd_sync` argparse + dispatch wrapper round-trip.""" + + def test_dry_run_default_no_mutation(self, monkeypatch, tmp_dir, synced_world, capsys): + from mempalace import cli + + argv = [ + "mempalace", + "--palace", + synced_world["palace_path"], + "sync", + synced_world["repo_path"], + ] + monkeypatch.setattr("sys.argv", argv) + cli.main() + + captured = capsys.readouterr().out + assert "DRY RUN" in captured + assert "would remove" in captured + + client, col = _open_drawers(synced_world["palace_path"]) + try: + assert len(_drawer_ids(col)) == 6 # synced_world seeds 6, dry-run touches none + finally: + del client + + def test_apply_flag_deletes(self, monkeypatch, tmp_dir, synced_world, capsys): + from mempalace import cli + + argv = [ + "mempalace", + "--palace", + synced_world["palace_path"], + "sync", + synced_world["repo_path"], + "--apply", + "--wing", + "demo", + ] + monkeypatch.setattr("sys.argv", argv) + cli.main() + + captured = capsys.readouterr().out + assert "Removed" in captured + assert "(removed)" in captured + + client, col = _open_drawers(synced_world["palace_path"]) + try: + survivors = _drawer_ids(col) + finally: + del client + assert survivors == { + "drawer_keep", + "drawer_no_source", + "drawer_out_of_scope", + } + + def test_cli_emits_wal_on_apply(self, monkeypatch, synced_world): + """F8 regression: cmd_sync must wire `_wal_log` so CLI deletes are + audited. Without this, scripted CLI invocations leave no trail.""" + from mempalace import cli, mcp_server + + seen = [] + original = mcp_server._wal_log + + def recording_wal(operation, params, result=None): + seen.append((operation, params, result)) + original(operation, params, result) + + monkeypatch.setattr(mcp_server, "_wal_log", recording_wal) + + argv = [ + "mempalace", + "--palace", + synced_world["palace_path"], + "sync", + synced_world["repo_path"], + "--apply", + "--wing", + "demo", + ] + monkeypatch.setattr("sys.argv", argv) + cli.main() + + ops = [op for op, _, _ in seen] + assert "sync_prune" in ops, f"CLI --apply did not emit WAL sync_prune entries; seen={ops}" + + def test_apply_without_scope_exits_2(self, monkeypatch, synced_world, capsys): + """F6 + F8 CLI hardening: --apply with no scope exits non-zero.""" + from mempalace import cli + + argv = [ + "mempalace", + "--palace", + synced_world["palace_path"], + "sync", + "--apply", + ] + monkeypatch.setattr("sys.argv", argv) + with pytest.raises(SystemExit) as exc_info: + cli.main() + assert exc_info.value.code == 2 diff --git a/website/reference/mcp-tools.md b/website/reference/mcp-tools.md index 6866aa6..220b15e 100644 --- a/website/reference/mcp-tools.md +++ b/website/reference/mcp-tools.md @@ -1,6 +1,6 @@ # MCP Tools Reference -Detailed parameter schemas for all 29 MCP tools. +Detailed parameter schemas for all 30 MCP tools. ## Palace — Read Tools @@ -114,6 +114,20 @@ Delete a drawer by ID. Irreversible. --- +### `mempalace_sync` + +Prune drawers whose source files are gitignored, deleted, or moved. Returns a dry-run report by default; pass `apply=true` to commit deletions. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `project_dir` | string | No | Project root to scope the sync (auto-detected from drawer metadata if omitted) | +| `wing` | string | No | Limit to one wing | +| `apply` | boolean | No | Actually delete drawers; default is dry-run preview | + +**Returns:** `{ scanned, kept, gitignored, missing, no_source, out_of_scope, removed_drawers, removed_closets, dry_run, by_source }` + +--- + ### `mempalace_get_drawer` Fetch a single drawer by ID — returns full content and metadata.