fix: add file-level locking to prevent multi-agent duplicate drawers

Root cause: when multiple agents mine simultaneously, both pass
file_already_mined() check, both delete+insert the same file's
drawers, creating duplicates or losing data.

Fix: mine_lock() in palace.py — cross-platform file lock (fcntl on
Unix, msvcrt on Windows). Both miner.py and convo_miner.py now lock
per-file during the delete+insert cycle and re-check after acquiring
the lock.

Tested:
- Lock acquires and releases correctly
- Second agent blocks until first releases (0.25s wait)
- 33/33 existing tests pass
- Cross-platform: fcntl (macOS/Linux), msvcrt (Windows)

Based on v3.2.0 tag.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
MSL
2026-04-13 01:16:51 -07:00
committed by Igor Lins e Silva
parent 6614b9b4e7
commit 30a431924b
3 changed files with 102 additions and 51 deletions
+8 -2
View File
@@ -16,7 +16,7 @@ from datetime import datetime
from collections import defaultdict from collections import defaultdict
from .normalize import normalize from .normalize import normalize
from .palace import SKIP_DIRS, get_collection, file_already_mined from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock
# File types that might contain conversations # File types that might contain conversations
@@ -375,8 +375,14 @@ def mine_convos(
if extract_mode != "general": if extract_mode != "general":
room_counts[room] += 1 room_counts[room] += 1
# File each chunk # File each chunk — lock to prevent concurrent agents duplicating
drawers_added = 0 drawers_added = 0
with mine_lock(source_file):
# Re-check after lock — another agent may have just finished this file
if file_already_mined(collection, source_file):
files_skipped += 1
continue
for chunk in chunks: for chunk in chunks:
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
if extract_mode == "general": if extract_mode == "general":
+9 -1
View File
@@ -15,7 +15,7 @@ from pathlib import Path
from datetime import datetime from datetime import datetime
from collections import defaultdict from collections import defaultdict
from .palace import SKIP_DIRS, get_collection, file_already_mined from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock
READABLE_EXTENSIONS = { READABLE_EXTENSIONS = {
".txt", ".txt",
@@ -434,6 +434,14 @@ def process_file(
print(f" [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)") print(f" [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)")
return len(chunks), room return len(chunks), room
# Lock this file so concurrent agents don't interleave delete+insert.
# Without the lock, two agents can both pass file_already_mined(),
# both delete, and both insert — creating duplicates or losing data.
with mine_lock(source_file):
# Re-check after acquiring lock — another agent may have just finished
if file_already_mined(collection, source_file, check_mtime=True):
return 0, room
# Purge stale drawers for this file before re-inserting the fresh chunks. # Purge stale drawers for this file before re-inserting the fresh chunks.
# Converts modified-file re-mines from upsert-over-existing-IDs (which hits # Converts modified-file re-mines from upsert-over-existing-IDs (which hits
# hnswlib's thread-unsafe updatePoint path and can segfault on macOS ARM # hnswlib's thread-unsafe updatePoint path and can segfault on macOS ARM
+37
View File
@@ -4,6 +4,8 @@ palace.py — Shared palace operations.
Consolidates collection access patterns used by both miners and the MCP server. Consolidates collection access patterns used by both miners and the MCP server.
""" """
import contextlib
import hashlib
import os import os
from .backends.chroma import ChromaBackend from .backends.chroma import ChromaBackend
@@ -50,6 +52,41 @@ def get_collection(
) )
@contextlib.contextmanager
def mine_lock(source_file: str):
"""Cross-platform file lock for mine operations.
Prevents multiple agents from mining the same file simultaneously,
which causes duplicate drawers when the delete+insert cycle interleaves.
"""
lock_dir = os.path.join(os.path.expanduser("~"), ".mempalace", "locks")
os.makedirs(lock_dir, exist_ok=True)
lock_path = os.path.join(
lock_dir, hashlib.sha256(source_file.encode()).hexdigest()[:16] + ".lock"
)
lf = open(lock_path, "w")
try:
if os.name == "nt":
import msvcrt
msvcrt.locking(lf.fileno(), msvcrt.LK_LOCK, 1)
else:
import fcntl
fcntl.flock(lf, fcntl.LOCK_EX)
yield
finally:
try:
if os.name == "nt":
import msvcrt
msvcrt.locking(lf.fileno(), msvcrt.LK_UNLCK, 1)
else:
import fcntl
fcntl.flock(lf, fcntl.LOCK_UN)
except Exception:
pass
lf.close()
def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool: def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool:
"""Check if a file has already been filed in the palace. """Check if a file has already been filed in the palace.