fix: add file-level locking to prevent multi-agent duplicate drawers
Root cause: when multiple agents mine simultaneously, both pass file_already_mined() check, both delete+insert the same file's drawers, creating duplicates or losing data. Fix: mine_lock() in palace.py — cross-platform file lock (fcntl on Unix, msvcrt on Windows). Both miner.py and convo_miner.py now lock per-file during the delete+insert cycle and re-check after acquiring the lock. Tested: - Lock acquires and releases correctly - Second agent blocks until first releases (0.25s wait) - 33/33 existing tests pass - Cross-platform: fcntl (macOS/Linux), msvcrt (Windows) Based on v3.2.0 tag. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+34
-28
@@ -16,7 +16,7 @@ from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
from .normalize import normalize
|
||||
from .palace import SKIP_DIRS, get_collection, file_already_mined
|
||||
from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock
|
||||
|
||||
|
||||
# File types that might contain conversations
|
||||
@@ -375,34 +375,40 @@ def mine_convos(
|
||||
if extract_mode != "general":
|
||||
room_counts[room] += 1
|
||||
|
||||
# File each chunk
|
||||
# File each chunk — lock to prevent concurrent agents duplicating
|
||||
drawers_added = 0
|
||||
for chunk in chunks:
|
||||
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
|
||||
if extract_mode == "general":
|
||||
room_counts[chunk_room] += 1
|
||||
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
|
||||
try:
|
||||
collection.upsert(
|
||||
documents=[chunk["content"]],
|
||||
ids=[drawer_id],
|
||||
metadatas=[
|
||||
{
|
||||
"wing": wing,
|
||||
"room": chunk_room,
|
||||
"source_file": source_file,
|
||||
"chunk_index": chunk["chunk_index"],
|
||||
"added_by": agent,
|
||||
"filed_at": datetime.now().isoformat(),
|
||||
"ingest_mode": "convos",
|
||||
"extract_mode": extract_mode,
|
||||
}
|
||||
],
|
||||
)
|
||||
drawers_added += 1
|
||||
except Exception as e:
|
||||
if "already exists" not in str(e).lower():
|
||||
raise
|
||||
with mine_lock(source_file):
|
||||
# Re-check after lock — another agent may have just finished this file
|
||||
if file_already_mined(collection, source_file):
|
||||
files_skipped += 1
|
||||
continue
|
||||
|
||||
for chunk in chunks:
|
||||
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
|
||||
if extract_mode == "general":
|
||||
room_counts[chunk_room] += 1
|
||||
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
|
||||
try:
|
||||
collection.upsert(
|
||||
documents=[chunk["content"]],
|
||||
ids=[drawer_id],
|
||||
metadatas=[
|
||||
{
|
||||
"wing": wing,
|
||||
"room": chunk_room,
|
||||
"source_file": source_file,
|
||||
"chunk_index": chunk["chunk_index"],
|
||||
"added_by": agent,
|
||||
"filed_at": datetime.now().isoformat(),
|
||||
"ingest_mode": "convos",
|
||||
"extract_mode": extract_mode,
|
||||
}
|
||||
],
|
||||
)
|
||||
drawers_added += 1
|
||||
except Exception as e:
|
||||
if "already exists" not in str(e).lower():
|
||||
raise
|
||||
|
||||
total_drawers += drawers_added
|
||||
print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers_added}")
|
||||
|
||||
Reference in New Issue
Block a user