fix(mine): identify lock holder + exit non-zero on contention

When a `mempalace mine` collided with another writer (live mcp_server,
another mine, anything taking mine_palace_lock), the operator saw a
generic "another `mempalace mine` is already running" message and the
CLI exited 0 — making the contention invisible to nohup or scripts
checking $?. The reporter ran a `nohup mempalace mine ... & disown`
and got a 200-byte log with only the auto-defaults warning, no clue
that an MCP server was holding the store.

palace.py: the lock file now records the holder's PID + first three
argv tokens on acquire. A failed acquire reads the file and surfaces
"palace <path> is held by PID N (mempalace mcp_server); wait for it
to finish or stop the holder before retrying" in the
MineAlreadyRunning message. Open mode changes from "w" to "a+" so the
prior holder's identity survives long enough to be read.

miner.mine() now lets MineAlreadyRunning propagate. cmd_mine catches
it, prints the holder-aware message to stderr, and exits non-zero so
shell wrappers detect the contention.

Note: this is a behavior change for in-process callers that depended
on miner.mine() silently swallowing MineAlreadyRunning. The silent
swallow was the bug.

Closes #1264
This commit is contained in:
Igor Lins e Silva
2026-05-08 01:00:00 -03:00
parent ea36a00f5f
commit ef8d83cc8a
5 changed files with 213 additions and 46 deletions
+33 -23
View File
@@ -500,31 +500,41 @@ def cmd_mine(args):
llm_provider=None,
)
if args.mode == "convos":
from .convo_miner import mine_convos
from .palace import MineAlreadyRunning
mine_convos(
convo_dir=args.dir,
palace_path=palace_path,
wing=args.wing,
agent=args.agent,
limit=args.limit,
dry_run=args.dry_run,
extract_mode=args.extract,
)
else:
from .miner import mine
try:
if args.mode == "convos":
from .convo_miner import mine_convos
mine(
project_dir=args.dir,
palace_path=palace_path,
wing_override=args.wing,
agent=args.agent,
limit=args.limit,
dry_run=args.dry_run,
respect_gitignore=not args.no_gitignore,
include_ignored=include_ignored,
)
mine_convos(
convo_dir=args.dir,
palace_path=palace_path,
wing=args.wing,
agent=args.agent,
limit=args.limit,
dry_run=args.dry_run,
extract_mode=args.extract,
)
else:
from .miner import mine
mine(
project_dir=args.dir,
palace_path=palace_path,
wing_override=args.wing,
agent=args.agent,
limit=args.limit,
dry_run=args.dry_run,
respect_gitignore=not args.no_gitignore,
include_ignored=include_ignored,
)
except MineAlreadyRunning as exc:
# A live MCP server or another mine is already writing to this
# palace. Surface the holder identity so the operator knows what
# to wait for (or stop), and exit non-zero so wrappers like
# nohup / scripts can detect the contention.
print(f"mempalace: {exc}", file=sys.stderr)
sys.exit(1)
def cmd_sweep(args):
+14 -20
View File
@@ -21,7 +21,6 @@ from typing import Optional
from .palace import (
NORMALIZE_VERSION,
SKIP_DIRS,
MineAlreadyRunning,
build_closet_lines,
file_already_mined,
get_closets_collection,
@@ -1035,26 +1034,21 @@ def mine(
files=files,
)
try:
with mine_palace_lock(palace_path):
return _mine_impl(
project_dir,
palace_path,
wing_override=wing_override,
agent=agent,
limit=limit,
dry_run=dry_run,
respect_gitignore=respect_gitignore,
include_ignored=include_ignored,
files=files,
)
except MineAlreadyRunning:
print(
f"mempalace: another `mine` is already running against "
f"{palace_path} — exiting cleanly.",
file=sys.stderr,
# MineAlreadyRunning propagates so the CLI can render a clear holder-aware
# message and exit non-zero. In-process callers (tests, library users) that
# expect to coexist with another writer should handle the exception.
with mine_palace_lock(palace_path):
return _mine_impl(
project_dir,
palace_path,
wing_override=wing_override,
agent=agent,
limit=limit,
dry_run=dry_run,
respect_gitignore=respect_gitignore,
include_ignored=include_ignored,
files=files,
)
return
def _mine_impl(
+48 -3
View File
@@ -9,6 +9,7 @@ import hashlib
import logging
import os
import re
import sys
import threading
from typing import Optional
@@ -364,6 +365,41 @@ def _mark_released(lock_key: str) -> None:
_holder_state().discard(lock_key)
def _format_lock_holder(content: str) -> str:
"""Render a lock-file body as 'PID N (cmdline)' for diagnostic messages."""
parts = content.split(maxsplit=1)
if not parts or not parts[0].isdigit():
return "another writer (identity not recorded)"
pid = parts[0]
if len(parts) > 1 and parts[1].strip():
return f"PID {pid} ({parts[1].strip()})"
return f"PID {pid}"
def _read_lock_holder(lock_file) -> str:
"""Read the prior holder's identity from the lock-file body, best-effort."""
try:
lock_file.seek(0)
content = lock_file.read().strip()
except OSError:
return "another writer (identity not recorded)"
if not content:
return "another writer (identity not recorded)"
return _format_lock_holder(content)
def _write_lock_holder(lock_file) -> None:
"""Record this process's identity in the lock-file body. Best-effort."""
try:
ident = f"{os.getpid()} {' '.join(sys.argv[:3])}".strip()
lock_file.seek(0)
lock_file.truncate()
lock_file.write(ident)
lock_file.flush()
except OSError:
pass
@contextlib.contextmanager
def mine_palace_lock(palace_path: str):
"""Per-palace non-blocking lock around the full `mine` pipeline.
@@ -407,7 +443,10 @@ def mine_palace_lock(palace_path: str):
yield
return
lf = open(lock_path, "w")
# "a+" preserves the prior holder's identity recorded inside the file so
# a failed acquire can name who is holding the lock (#1264). "w" mode
# would have truncated the file before we could read it.
lf = open(lock_path, "a+")
acquired = False
try:
if os.name == "nt":
@@ -417,8 +456,10 @@ def mine_palace_lock(palace_path: str):
msvcrt.locking(lf.fileno(), msvcrt.LK_NBLCK, 1)
acquired = True
except OSError as exc:
holder = _read_lock_holder(lf)
raise MineAlreadyRunning(
f"another `mempalace mine` is already running against {resolved}"
f"palace {resolved} is held by {holder}; "
"wait for it to finish or stop the holder before retrying"
) from exc
else:
import fcntl
@@ -427,9 +468,13 @@ def mine_palace_lock(palace_path: str):
fcntl.flock(lf, fcntl.LOCK_EX | fcntl.LOCK_NB)
acquired = True
except BlockingIOError as exc:
holder = _read_lock_holder(lf)
raise MineAlreadyRunning(
f"another `mempalace mine` is already running against {resolved}"
f"palace {resolved} is held by {holder}; "
"wait for it to finish or stop the holder before retrying"
) from exc
# Record our own identity for any later contender's diagnostic message.
_write_lock_holder(lf)
_mark_held(palace_key)
try:
yield