fix(mine): identify lock holder + exit non-zero on contention
When a `mempalace mine` collided with another writer (live mcp_server, another mine, anything taking mine_palace_lock), the operator saw a generic "another `mempalace mine` is already running" message and the CLI exited 0 — making the contention invisible to nohup or scripts checking $?. The reporter ran a `nohup mempalace mine ... & disown` and got a 200-byte log with only the auto-defaults warning, no clue that an MCP server was holding the store. palace.py: the lock file now records the holder's PID + first three argv tokens on acquire. A failed acquire reads the file and surfaces "palace <path> is held by PID N (mempalace mcp_server); wait for it to finish or stop the holder before retrying" in the MineAlreadyRunning message. Open mode changes from "w" to "a+" so the prior holder's identity survives long enough to be read. miner.mine() now lets MineAlreadyRunning propagate. cmd_mine catches it, prints the holder-aware message to stderr, and exits non-zero so shell wrappers detect the contention. Note: this is a behavior change for in-process callers that depended on miner.mine() silently swallowing MineAlreadyRunning. The silent swallow was the bug. Closes #1264
This commit is contained in:
@@ -555,6 +555,45 @@ def test_cmd_mine_include_ignored_comma_split(mock_config_cls):
|
||||
assert call_kwargs["include_ignored"] == ["a.txt", "b.txt", "c.txt"]
|
||||
|
||||
|
||||
@patch("mempalace.cli.MempalaceConfig")
|
||||
def test_cmd_mine_exits_nonzero_on_lock_holder(mock_config_cls, capsys):
|
||||
"""Regression #1264: lock contention must exit non-zero with a clear message.
|
||||
|
||||
Before this fix the CLI silently returned 0 when another writer held
|
||||
the palace lock — operators using nohup/scripts had no way to detect
|
||||
the contention. The new behavior raises MineAlreadyRunning out of
|
||||
miner.mine() and cmd_mine catches it, printing the holder identity
|
||||
to stderr and exiting non-zero.
|
||||
"""
|
||||
from mempalace.palace import MineAlreadyRunning
|
||||
|
||||
mock_config_cls.return_value.palace_path = "/fake/palace"
|
||||
args = argparse.Namespace(
|
||||
dir="/src",
|
||||
palace=None,
|
||||
mode="projects",
|
||||
wing=None,
|
||||
agent="mempalace",
|
||||
limit=0,
|
||||
dry_run=False,
|
||||
no_gitignore=False,
|
||||
include_ignored=[],
|
||||
extract="exchange",
|
||||
)
|
||||
with patch(
|
||||
"mempalace.miner.mine",
|
||||
side_effect=MineAlreadyRunning(
|
||||
"palace /fake/palace is held by PID 12345 (mempalace mcp_server); wait for it to finish"
|
||||
),
|
||||
):
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
cmd_mine(args)
|
||||
assert excinfo.value.code == 1
|
||||
captured = capsys.readouterr()
|
||||
assert "PID 12345" in captured.err
|
||||
assert "mcp_server" in captured.err
|
||||
|
||||
|
||||
# ── cmd_wakeup ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -208,6 +208,85 @@ def _try_acquire_expect_busy(palace_path, result_q):
|
||||
result_q.put("busy")
|
||||
|
||||
|
||||
def _hold_lock_send_pid(palace_path: str, ready_flag: str, release_flag: str, pid_q) -> None:
|
||||
"""Acquire the lock, push our PID + cmdline through the queue, then wait."""
|
||||
import sys as _sys
|
||||
|
||||
try:
|
||||
with mine_palace_lock(palace_path):
|
||||
pid_q.put((os.getpid(), list(_sys.argv[:3])))
|
||||
open(ready_flag, "w").close()
|
||||
for _ in range(500):
|
||||
if os.path.exists(release_flag):
|
||||
return
|
||||
time.sleep(0.01)
|
||||
except MineAlreadyRunning:
|
||||
pid_q.put(("error", "raised"))
|
||||
|
||||
|
||||
def test_lock_failure_message_names_holder(tmp_path, monkeypatch):
|
||||
"""Regression #1264: failed acquire must identify the holder by PID.
|
||||
|
||||
Before this fix, a `mempalace mine` colliding with another writer
|
||||
(mine, MCP server, anything taking mine_palace_lock) saw a generic
|
||||
"another `mempalace mine` is already running" message and exited
|
||||
silently. The operator had no signal of which process to wait for
|
||||
or stop. The new message includes ``PID N`` so the holder can be
|
||||
identified directly.
|
||||
"""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
palace = str(tmp_path / "palace")
|
||||
ready = str(tmp_path / "ready")
|
||||
release = str(tmp_path / "release")
|
||||
|
||||
ctx = _get_mp_context()
|
||||
pid_q = ctx.Queue()
|
||||
holder = ctx.Process(target=_hold_lock_send_pid, args=(palace, ready, release, pid_q))
|
||||
holder.start()
|
||||
try:
|
||||
for _ in range(500):
|
||||
if os.path.exists(ready):
|
||||
break
|
||||
time.sleep(0.01)
|
||||
assert os.path.exists(ready), "holder failed to acquire lock in time"
|
||||
holder_pid, _holder_argv = pid_q.get(timeout=2)
|
||||
|
||||
with pytest.raises(MineAlreadyRunning) as excinfo:
|
||||
with mine_palace_lock(palace):
|
||||
pytest.fail("second acquire of same palace should have raised")
|
||||
|
||||
msg = str(excinfo.value)
|
||||
assert (
|
||||
f"PID {holder_pid}" in msg
|
||||
), f"lock-failure message must name the holder PID; got: {msg!r}"
|
||||
finally:
|
||||
open(release, "w").close()
|
||||
holder.join(timeout=5)
|
||||
|
||||
|
||||
def test_lock_holder_identity_persists_across_release(tmp_path, monkeypatch):
|
||||
"""The holder line is overwritten by each new acquirer, not appended.
|
||||
|
||||
Without explicit truncate the lock file would accumulate lines across
|
||||
runs and grow without bound. Verify that re-acquire keeps the body
|
||||
bounded.
|
||||
"""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
palace = str(tmp_path / "palace")
|
||||
for _ in range(5):
|
||||
with mine_palace_lock(palace):
|
||||
pass
|
||||
|
||||
# Locate the lock file. The key derivation is internal but we can find
|
||||
# it by scanning the mempalace locks dir for mine_palace_*.lock entries.
|
||||
lock_dir = tmp_path / ".mempalace" / "locks"
|
||||
lock_files = list(lock_dir.glob("mine_palace_*.lock"))
|
||||
assert lock_files, "expected the palace lock file to exist after acquire/release"
|
||||
body = lock_files[0].read_text()
|
||||
# One identity line, no accumulation.
|
||||
assert body.count("\n") <= 1, f"lock body must not grow across re-acquires; got {body!r}"
|
||||
|
||||
|
||||
def test_mine_global_lock_is_alias_for_back_compat(tmp_path, monkeypatch):
|
||||
"""Old callers of `mine_global_lock` should still work."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
Reference in New Issue
Block a user