fix: add file-level locking to prevent multi-agent duplicate drawers

Root cause: when multiple agents mine simultaneously, both pass
file_already_mined() check, both delete+insert the same file's
drawers, creating duplicates or losing data.

Fix: mine_lock() in palace.py — cross-platform file lock (fcntl on
Unix, msvcrt on Windows). Both miner.py and convo_miner.py now lock
per-file during the delete+insert cycle and re-check after acquiring
the lock.

Tested:
- Lock acquires and releases correctly
- Second agent blocks until first releases (0.25s wait)
- 33/33 existing tests pass
- Cross-platform: fcntl (macOS/Linux), msvcrt (Windows)

Based on v3.2.0 tag.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
MSL
2026-04-13 01:16:51 -07:00
committed by Igor Lins e Silva
parent 6614b9b4e7
commit 30a431924b
3 changed files with 102 additions and 51 deletions
+34 -28
View File
@@ -16,7 +16,7 @@ from datetime import datetime
from collections import defaultdict
from .normalize import normalize
from .palace import SKIP_DIRS, get_collection, file_already_mined
from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock
# File types that might contain conversations
@@ -375,34 +375,40 @@ def mine_convos(
if extract_mode != "general":
room_counts[room] += 1
# File each chunk
# File each chunk — lock to prevent concurrent agents duplicating
drawers_added = 0
for chunk in chunks:
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
if extract_mode == "general":
room_counts[chunk_room] += 1
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
try:
collection.upsert(
documents=[chunk["content"]],
ids=[drawer_id],
metadatas=[
{
"wing": wing,
"room": chunk_room,
"source_file": source_file,
"chunk_index": chunk["chunk_index"],
"added_by": agent,
"filed_at": datetime.now().isoformat(),
"ingest_mode": "convos",
"extract_mode": extract_mode,
}
],
)
drawers_added += 1
except Exception as e:
if "already exists" not in str(e).lower():
raise
with mine_lock(source_file):
# Re-check after lock — another agent may have just finished this file
if file_already_mined(collection, source_file):
files_skipped += 1
continue
for chunk in chunks:
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
if extract_mode == "general":
room_counts[chunk_room] += 1
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
try:
collection.upsert(
documents=[chunk["content"]],
ids=[drawer_id],
metadatas=[
{
"wing": wing,
"room": chunk_room,
"source_file": source_file,
"chunk_index": chunk["chunk_index"],
"added_by": agent,
"filed_at": datetime.now().isoformat(),
"ingest_mode": "convos",
"extract_mode": extract_mode,
}
],
)
drawers_added += 1
except Exception as e:
if "already exists" not in str(e).lower():
raise
total_drawers += drawers_added
print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers_added}")