fix(hooks): per-target PID guard with atomic claim (#1212, #1206)

The hook PID guard used a single global ``~/.mempalace/hook_state/mine.pid``
file, which failed two ways:

1. ``_mine_already_running`` read-then-spawn was a TOCTOU race. Two
   near-simultaneous Stop hook fires both passed the existence/liveness
   check before either wrote — so both ended up calling
   ``_spawn_mine``.

2. ``_spawn_mine`` unconditionally overwrote the global PID file with
   the new child's PID. The first PID was lost, orphaning the first
   child. The user-visible result in #1212 was two concurrent
   ``mempalace mine`` processes running against the same source, both
   driving HNSW inserts in parallel — exactly the corruption pattern
   the guard was meant to prevent. #1206 reported the same shape from
   the perspective of the user (two mines hung on a 350MB folder).

Replace the global file with per-target slots under
``~/.mempalace/hook_state/mine_pids/``, keyed by sha256 of the mine
sub-arguments (everything after ``mine``). The slot is claimed via
``O_CREAT | O_EXCL`` so the claim is atomic — two simultaneous fires
can never both pass.  Stale slots (PID exists but is dead) are
reclaimed transparently. Different targets (e.g. project mine vs
transcript ingest, or two different MEMPAL_DIRs) get independent
slots and run in parallel.

The mine subprocess receives its slot path via
``MEMPALACE_MINE_PID_FILE`` env var; ``miner._cleanup_mine_pid_file``
reads that var on exit and removes the slot if it points at our PID,
so orphaned slots from crashed mines don't accumulate.

Also routes ``_ingest_transcript`` through ``_spawn_mine`` so the
transcript ingest path now participates in the same dedup — repeated
Stop fires for the same transcript no longer stack parallel mines.

Closes #1212
Closes #1206
This commit is contained in:
Igor Lins e Silva
2026-05-08 02:09:00 -03:00
parent c70d5182dd
commit 3a76360301
4 changed files with 366 additions and 92 deletions
+143 -31
View File
@@ -6,6 +6,7 @@ Supported hooks: session-start, stop, precompact
Supported harnesses: claude-code, codex (extensible to cursor, gemini, etc.) Supported harnesses: claude-code, codex (extensible to cursor, gemini, etc.)
""" """
import hashlib
import json import json
import os import os
import re import re
@@ -256,7 +257,45 @@ def _get_mine_targets() -> list[tuple[str, str]]:
return targets return targets
_MINE_PID_FILE = STATE_DIR / "mine.pid" # Per-target PID guard.
#
# Hook fires ingest mines in the background. If a previous fire's child is
# still running for the *same* target (same source dir, mode, wing), the new
# fire should skip rather than pile up — multiple concurrent mines against the
# same source corrupt the HNSW index and exhaust disk via duplicate upserts
# (#1212, #1206). But mines targeting *different* sources / modes must remain
# independent so the user can have e.g. project-mining and transcript-ingest
# running in parallel.
#
# The single ``mine.pid`` global file used previously failed both ways: the
# guard was rebuilt every spawn (so two near-simultaneous fires both passed
# the check before either wrote), and the file was unconditionally overwritten
# (so the second spawn lost the first PID, orphaning it). The replacement is
# a directory of per-target slots, claimed via ``O_CREAT | O_EXCL`` so the
# claim is atomic and per-target.
_MINE_PID_DIR = STATE_DIR / "mine_pids"
# The per-process PID file path is communicated to the mine subprocess via
# this env var so the child's cleanup hook (in miner.py) can remove its
# own slot on exit without scanning the whole directory.
_MINE_PID_FILE_ENV = "MEMPALACE_MINE_PID_FILE"
def _pid_file_for_cmd(cmd: list[str]) -> Path:
"""Return the per-target PID file path for a mine subcommand.
The key is derived from the mine arguments (everything after ``mine``)
so different (dir, mode, wing) combinations get independent slots.
Two fires with the same arguments collapse to the same slot — which is
exactly the dedup we want.
"""
try:
idx = cmd.index("mine")
key = " ".join(cmd[idx:])
except ValueError:
key = " ".join(cmd)
digest = hashlib.sha256(key.encode("utf-8")).hexdigest()[:16]
return _MINE_PID_DIR / f"mine_{digest}.pid"
def _pid_alive(pid: int) -> bool: def _pid_alive(pid: int) -> bool:
@@ -292,22 +331,96 @@ def _pid_alive(pid: int) -> bool:
return False return False
def _mine_already_running() -> bool: def _mine_already_running(cmd: list[str]) -> bool:
"""Return True if a background mine process from a previous hook fire is still alive.""" """Return True if a previous mine for ``cmd``'s target is still alive."""
pid_file = _pid_file_for_cmd(cmd)
try: try:
pid = int(_MINE_PID_FILE.read_text().strip()) recorded = pid_file.read_text().strip()
except (OSError, ValueError): except OSError:
return False return False
return _pid_alive(pid) if not recorded.isdigit():
return False
return _pid_alive(int(recorded))
def _claim_mine_slot(cmd: list[str]) -> Path | None:
"""Atomically reserve the per-target PID slot for ``cmd``.
Returns the slot path on success, or ``None`` if the target is
already being mined by a live process. The reservation is done via
``O_CREAT | O_EXCL`` so two simultaneous hook fires can never both
pass the check; one wins, the other returns None.
A stale slot (file exists but the recorded PID is dead) is reclaimed
transparently — orphan miners that crashed without cleanup do not
block future hook fires forever.
"""
pid_file = _pid_file_for_cmd(cmd)
pid_file.parent.mkdir(parents=True, exist_ok=True)
try:
fd = os.open(str(pid_file), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
os.close(fd)
return pid_file
except FileExistsError:
pass
# Slot exists. If the holder is alive, defer.
if _mine_already_running(cmd):
return None
# Stale entry; reclaim. The unlink+create is racy against another hook
# firing right now, but the second create's O_EXCL will fail and that
# caller will see the live PID via the next round.
try:
pid_file.unlink()
except FileNotFoundError:
pass
except OSError:
return None
try:
fd = os.open(str(pid_file), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
os.close(fd)
return pid_file
except FileExistsError:
return None
def _spawn_mine(cmd: list) -> None: def _spawn_mine(cmd: list) -> None:
"""Spawn a mine subprocess, write its PID to the lock file, log to hook.log.""" """Spawn a mine subprocess if no live mine is already targeting it.
The PID slot is claimed atomically *before* the spawn, so two near-
simultaneous hook fires can't both proceed — the second sees the
claimed slot and silently skips. The spawned process inherits a
``MEMPALACE_MINE_PID_FILE`` env var so its cleanup hook can remove
the slot on exit without scanning the directory.
"""
STATE_DIR.mkdir(parents=True, exist_ok=True) STATE_DIR.mkdir(parents=True, exist_ok=True)
log_path = STATE_DIR / "hook.log" log_path = STATE_DIR / "hook.log"
pid_file = _claim_mine_slot(cmd)
if pid_file is None:
_log(f"Skipping mine: target already running ({' '.join(cmd[-3:])})")
return
child_env = os.environ.copy()
child_env[_MINE_PID_FILE_ENV] = str(pid_file)
with open(log_path, "a") as log_f: with open(log_path, "a") as log_f:
proc = subprocess.Popen(cmd, stdout=log_f, stderr=log_f, **_detached_popen_kwargs()) try:
_MINE_PID_FILE.write_text(str(proc.pid)) proc = subprocess.Popen(
cmd,
stdout=log_f,
stderr=log_f,
env=child_env,
**_detached_popen_kwargs(),
)
except OSError:
# Spawn failed; release the slot we just claimed so the next
# hook fire can try again rather than skipping forever.
try:
pid_file.unlink()
except OSError:
pass
raise
try:
pid_file.write_text(str(proc.pid))
except OSError:
pass
def _maybe_auto_ingest(): def _maybe_auto_ingest():
@@ -317,13 +430,15 @@ def _maybe_auto_ingest():
in the hook handlers — this function does not handle them, to avoid in the hook handlers — this function does not handle them, to avoid
asymmetric interpreter handling and PID-file overwrite when both asymmetric interpreter handling and PID-file overwrite when both
targets fire from a single hook call (#1231 review). targets fire from a single hook call (#1231 review).
Per-target dedup is done by ``_spawn_mine`` itself: each (dir, mode)
target gets its own PID slot, so distinct targets never block each
other but a re-fire of the same target while the previous one is
still running is silently skipped.
""" """
targets = _get_mine_targets() targets = _get_mine_targets()
if not targets: if not targets:
return return
if _mine_already_running():
_log("Skipping auto-ingest: mine already running")
return
for mine_dir, mode in targets: for mine_dir, mode in targets:
try: try:
_spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode]) _spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode])
@@ -518,25 +633,22 @@ def _ingest_transcript(transcript_path: str):
return return
try: try:
log_path = STATE_DIR / "hook.log" # Route through ``_spawn_mine`` so the per-target PID guard kicks
STATE_DIR.mkdir(parents=True, exist_ok=True) # in here too — repeated Stop/PreCompact fires for the same
with open(log_path, "a") as log_f: # transcript should not stack up parallel ingest mines.
subprocess.Popen( _spawn_mine(
[ [
_mempalace_python(), _mempalace_python(),
"-m", "-m",
"mempalace", "mempalace",
"mine", "mine",
str(path.parent), str(path.parent),
"--mode", "--mode",
"convos", "convos",
"--wing", "--wing",
"sessions", "sessions",
], ]
stdout=log_f, )
stderr=log_f,
**_detached_popen_kwargs(),
)
_log(f"Transcript ingest started: {path.name}") _log(f"Transcript ingest started: {path.name}")
except OSError: except OSError:
pass pass
+16 -17
View File
@@ -1206,30 +1206,29 @@ def _mine_impl(
def _cleanup_mine_pid_file() -> None: def _cleanup_mine_pid_file() -> None:
"""Remove the global mine PID file if it currently points at us. """Remove this process's per-target PID slot on exit.
The PID file (``~/.mempalace/hook_state/mine.pid``, written by the Hook-spawned mines receive ``MEMPALACE_MINE_PID_FILE`` in their env
hook in :func:`mempalace.hooks_cli._spawn_mine`) tracks the PID of pointing at the slot the hook claimed for them
the most recently spawned mine subprocess so the hook can dedup (``~/.mempalace/hook_state/mine_pids/mine_<sha>.pid``). When the
concurrent auto-ingest fires. When that subprocess exits — cleanly, subprocess exits — cleanly, on error, or via Ctrl-C — it removes its
on error, or via Ctrl-C — it should remove its own entry so the own slot so the next hook fire isn't briefly fooled by a stale PID
next hook fire isn't briefly fooled by a stale PID before before ``_pid_alive`` returns False.
``_pid_alive`` returns False.
We only delete the file if it claims our own PID; any other PID is Only delete the slot if it claims our own PID; any other PID is left
left alone (could be an unrelated mine running concurrently from alone (it could belong to an unrelated mine that just claimed the
a different worktree / session). same slot via a stale-reclaim race).
""" """
try: pid_file_env = os.environ.get("MEMPALACE_MINE_PID_FILE", "")
from .hooks_cli import _MINE_PID_FILE if not pid_file_env:
except Exception:
return return
try: try:
if not _MINE_PID_FILE.exists(): pid_file = Path(pid_file_env)
if not pid_file.exists():
return return
recorded = _MINE_PID_FILE.read_text().strip() recorded = pid_file.read_text().strip()
if recorded and recorded.isdigit() and int(recorded) == os.getpid(): if recorded and recorded.isdigit() and int(recorded) == os.getpid():
_MINE_PID_FILE.unlink() pid_file.unlink()
except OSError: except OSError:
# Best-effort cleanup; never fail the mine over PID bookkeeping. # Best-effort cleanup; never fail the mine over PID bookkeeping.
pass pass
+196 -35
View File
@@ -3,6 +3,7 @@ import io
import json import json
import os import os
import subprocess import subprocess
import sys
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
@@ -441,7 +442,7 @@ def test_maybe_auto_ingest_with_env(tmp_path):
mempal_dir.mkdir() mempal_dir.mkdir()
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
_maybe_auto_ingest() _maybe_auto_ingest()
mock_popen.assert_called_once() mock_popen.assert_called_once()
@@ -463,7 +464,7 @@ def test_maybe_auto_ingest_uses_mempalace_python(tmp_path):
mempal_dir.mkdir() mempal_dir.mkdir()
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
with patch( with patch(
"mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python" "mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"
): ):
@@ -513,7 +514,7 @@ def test_maybe_auto_ingest_ignores_transcript_arg_path(tmp_path):
transcript.write_text("") transcript.write_text("")
with patch.dict("os.environ", {}, clear=True): with patch.dict("os.environ", {}, clear=True):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
_maybe_auto_ingest() _maybe_auto_ingest()
mock_popen.assert_not_called() mock_popen.assert_not_called()
@@ -543,21 +544,38 @@ def test_maybe_auto_ingest_oserror(tmp_path):
mempal_dir.mkdir() mempal_dir.mkdir()
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
with patch("mempalace.hooks_cli.subprocess.Popen", side_effect=OSError("fail")): with patch("mempalace.hooks_cli.subprocess.Popen", side_effect=OSError("fail")):
_maybe_auto_ingest() # should not raise _maybe_auto_ingest() # should not raise
def test_maybe_auto_ingest_skips_when_mine_running(tmp_path): def test_maybe_auto_ingest_skips_when_mine_running(tmp_path):
"""Does not spawn a new mine process if one is already running.""" """Does not spawn a new mine process if a mine for the same target is alive."""
mempal_dir = tmp_path / "project" mempal_dir = tmp_path / "project"
mempal_dir.mkdir() mempal_dir.mkdir()
pid_dir = tmp_path / "mine_pids"
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._mine_already_running", return_value=True): with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: # Pre-populate the per-target slot with a live PID (our own).
_maybe_auto_ingest() from mempalace.hooks_cli import _pid_file_for_cmd
mock_popen.assert_not_called()
cmd = [
sys.executable,
"-m",
"mempalace",
"mine",
str(mempal_dir.resolve()),
"--mode",
"projects",
]
pid_file = _pid_file_for_cmd(cmd)
pid_file.parent.mkdir(parents=True, exist_ok=True)
pid_file.write_text(str(os.getpid()))
with patch("mempalace.hooks_cli._mempalace_python", return_value=sys.executable):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
_maybe_auto_ingest()
mock_popen.assert_not_called()
# --- _detached_popen_kwargs --- # --- _detached_popen_kwargs ---
@@ -604,7 +622,7 @@ def test_detached_popen_kwargs_windows(monkeypatch):
def test_spawn_mine_uses_detached_kwargs(tmp_path): def test_spawn_mine_uses_detached_kwargs(tmp_path):
"""_spawn_mine forwards detached kwargs so the hook can exit cleanly.""" """_spawn_mine forwards detached kwargs so the hook can exit cleanly."""
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
mock_popen.return_value.pid = 9999 mock_popen.return_value.pid = 9999
from mempalace.hooks_cli import _spawn_mine from mempalace.hooks_cli import _spawn_mine
@@ -617,52 +635,195 @@ def test_spawn_mine_uses_detached_kwargs(tmp_path):
assert kwargs.get("close_fds") is True assert kwargs.get("close_fds") is True
def test_spawn_mine_skips_when_target_running(tmp_path):
"""A second spawn for the same cmd target while the first is alive must skip."""
pid_dir = tmp_path / "mine_pids"
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
cmd = ["mempalace", "mine", "/tmp/proj", "--mode", "projects"]
pid_file = _pid_file_for_cmd(cmd)
pid_file.parent.mkdir(parents=True, exist_ok=True)
pid_file.write_text(str(os.getpid())) # live PID
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
_spawn_mine(cmd)
mock_popen.assert_not_called()
def test_spawn_mine_distinct_targets_dont_block_each_other(tmp_path):
"""Two spawn calls for *different* targets both proceed."""
pid_dir = tmp_path / "mine_pids"
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
from mempalace.hooks_cli import _spawn_mine
mock_popen.return_value.pid = 1111
_spawn_mine(["mempalace", "mine", "/tmp/a", "--mode", "projects"])
mock_popen.return_value.pid = 2222
_spawn_mine(["mempalace", "mine", "/tmp/b", "--mode", "projects"])
assert mock_popen.call_count == 2
def test_spawn_mine_reclaims_stale_slot(tmp_path):
"""A slot pointing at a dead PID is reclaimed silently."""
pid_dir = tmp_path / "mine_pids"
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
cmd = ["mempalace", "mine", "/tmp/proj", "--mode", "projects"]
pid_file = _pid_file_for_cmd(cmd)
pid_file.parent.mkdir(parents=True, exist_ok=True)
pid_file.write_text("999999999") # dead PID
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
mock_popen.return_value.pid = 4242
_spawn_mine(cmd)
mock_popen.assert_called_once()
# New PID is recorded in the reclaimed slot.
assert pid_file.read_text().strip() == "4242"
def test_spawn_mine_releases_slot_on_oserror(tmp_path):
"""If Popen raises OSError, the claimed slot must be released."""
pid_dir = tmp_path / "mine_pids"
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
cmd = ["mempalace", "mine", "/tmp/proj", "--mode", "projects"]
pid_file = _pid_file_for_cmd(cmd)
with patch("mempalace.hooks_cli.subprocess.Popen", side_effect=OSError("spawn fail")):
with pytest.raises(OSError):
_spawn_mine(cmd)
assert (
not pid_file.exists()
), "slot must be released so the next hook fire isn't permanently blocked"
def test_spawn_mine_passes_pid_file_env_var(tmp_path):
"""The child inherits MEMPALACE_MINE_PID_FILE so its cleanup hook can find the slot."""
pid_dir = tmp_path / "mine_pids"
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
mock_popen.return_value.pid = 5555
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
_spawn_mine(cmd)
child_env = mock_popen.call_args.kwargs.get("env", {})
expected = str(_pid_file_for_cmd(cmd))
assert child_env.get("MEMPALACE_MINE_PID_FILE") == expected
def test_ingest_transcript_uses_detached_kwargs(tmp_path): def test_ingest_transcript_uses_detached_kwargs(tmp_path):
"""_ingest_transcript spawns the convos mine with detach kwargs.""" """_ingest_transcript spawns the convos mine with detach kwargs."""
transcript = tmp_path / "session.jsonl" transcript = tmp_path / "session.jsonl"
transcript.write_text("x" * 200) # > 100 byte gate transcript.write_text("x" * 200) # > 100 byte gate
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
from mempalace.hooks_cli import _ingest_transcript with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
from mempalace.hooks_cli import _ingest_transcript
_ingest_transcript(str(transcript)) _ingest_transcript(str(transcript))
assert mock_popen.called assert mock_popen.called
kwargs = mock_popen.call_args.kwargs kwargs = mock_popen.call_args.kwargs
assert kwargs.get("stdin") is subprocess.DEVNULL assert kwargs.get("stdin") is subprocess.DEVNULL
assert kwargs.get("close_fds") is True assert kwargs.get("close_fds") is True
def test_ingest_transcript_skips_when_target_running(tmp_path):
"""Repeated transcript ingests for the same transcript should dedup."""
transcript = tmp_path / "session.jsonl"
transcript.write_text("x" * 200)
pid_dir = tmp_path / "mine_pids"
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
with patch("mempalace.hooks_cli._mempalace_python", return_value=sys.executable):
from mempalace.hooks_cli import _ingest_transcript, _pid_file_for_cmd
expected_cmd = [
sys.executable,
"-m",
"mempalace",
"mine",
str(transcript.parent),
"--mode",
"convos",
"--wing",
"sessions",
]
pid_file = _pid_file_for_cmd(expected_cmd)
pid_file.parent.mkdir(parents=True, exist_ok=True)
pid_file.write_text(str(os.getpid())) # live target
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
_ingest_transcript(str(transcript))
mock_popen.assert_not_called()
# --- _mine_already_running --- # --- _mine_already_running ---
def _seed_slot(pid_dir, cmd, body: str):
"""Write ``body`` into the per-target slot for ``cmd`` under ``pid_dir``."""
from mempalace.hooks_cli import _pid_file_for_cmd
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
slot = _pid_file_for_cmd(cmd)
slot.parent.mkdir(parents=True, exist_ok=True)
slot.write_text(body)
return slot
def test_mine_already_running_no_file(tmp_path): def test_mine_already_running_no_file(tmp_path):
"""Returns False when no PID file exists.""" """Returns False when no per-target slot exists."""
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
assert _mine_already_running() is False with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
assert _mine_already_running(cmd) is False
def test_mine_already_running_dead_pid(tmp_path): def test_mine_already_running_dead_pid(tmp_path):
"""Returns False when PID file contains a PID that no longer exists.""" """Returns False when the slot's recorded PID is no longer alive."""
pid_file = tmp_path / "mine.pid" pid_dir = tmp_path / "mine_pids"
pid_file.write_text("999999999") # almost certainly not a real PID cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file): _seed_slot(pid_dir, cmd, "999999999") # almost certainly not a real PID
assert _mine_already_running() is False with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
assert _mine_already_running(cmd) is False
def test_mine_already_running_live_pid(tmp_path): def test_mine_already_running_live_pid(tmp_path):
"""Returns True when PID file contains the current process's own PID.""" """Returns True when the slot's recorded PID is alive."""
pid_file = tmp_path / "mine.pid" pid_dir = tmp_path / "mine_pids"
pid_file.write_text(str(os.getpid())) # current process is definitely alive cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file): _seed_slot(pid_dir, cmd, str(os.getpid())) # current process is alive
assert _mine_already_running() is True with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
assert _mine_already_running(cmd) is True
def test_mine_already_running_corrupt_file(tmp_path): def test_mine_already_running_corrupt_file(tmp_path):
"""Returns False when PID file contains non-integer content.""" """Returns False when the slot contains non-integer content."""
pid_file = tmp_path / "mine.pid" pid_dir = tmp_path / "mine_pids"
pid_file.write_text("not-a-pid") cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file): _seed_slot(pid_dir, cmd, "not-a-pid")
assert _mine_already_running() is False with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
assert _mine_already_running(cmd) is False
def test_mine_already_running_distinct_cmds_independent(tmp_path):
"""Slots are keyed per cmd; an alive entry for cmd A doesn't shadow cmd B."""
pid_dir = tmp_path / "mine_pids"
cmd_a = ["mempalace", "mine", "/tmp/a", "--mode", "projects"]
cmd_b = ["mempalace", "mine", "/tmp/b", "--mode", "projects"]
_seed_slot(pid_dir, cmd_a, str(os.getpid()))
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
assert _mine_already_running(cmd_a) is True
assert _mine_already_running(cmd_b) is False
# --- _get_mine_targets --- # --- _get_mine_targets ---
+11 -9
View File
@@ -777,7 +777,7 @@ def test_mine_arbitrary_exception_prints_summary_and_reraises(tmp_path, capsys):
def test_mine_cleans_up_pid_file_on_interrupt(tmp_path): def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
"""Our own PID entry in mine.pid is removed in the finally clause.""" """Our own per-target PID slot is removed in the finally clause."""
import pytest import pytest
from unittest.mock import patch from unittest.mock import patch
@@ -786,14 +786,16 @@ def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
_make_minable_project(project_root, n_files=2) _make_minable_project(project_root, n_files=2)
palace_path = project_root / "palace" palace_path = project_root / "palace"
pid_file = tmp_path / "mine.pid" pid_file = tmp_path / "mine_abc.pid"
pid_file.write_text(str(os.getpid())) pid_file.write_text(str(os.getpid()))
def fake_process_file(*args, **kwargs): def fake_process_file(*args, **kwargs):
raise KeyboardInterrupt raise KeyboardInterrupt
# The mine subprocess receives its slot path via env var; the cleanup
# hook in miner.py reads that var and removes the slot if it matches.
with ( with (
patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file), patch.dict(os.environ, {"MEMPALACE_MINE_PID_FILE": str(pid_file)}),
patch("mempalace.miner.process_file", side_effect=fake_process_file), patch("mempalace.miner.process_file", side_effect=fake_process_file),
): ):
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
@@ -803,7 +805,7 @@ def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path): def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path):
"""Successful mine also removes its own PID entry in the finally clause.""" """Successful mine also removes its own per-target PID slot."""
from unittest.mock import patch from unittest.mock import patch
project_root = tmp_path / "proj" project_root = tmp_path / "proj"
@@ -811,17 +813,17 @@ def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path):
_make_minable_project(project_root, n_files=1) _make_minable_project(project_root, n_files=1)
palace_path = project_root / "palace" palace_path = project_root / "palace"
pid_file = tmp_path / "mine.pid" pid_file = tmp_path / "mine_abc.pid"
pid_file.write_text(str(os.getpid())) pid_file.write_text(str(os.getpid()))
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file): with patch.dict(os.environ, {"MEMPALACE_MINE_PID_FILE": str(pid_file)}):
mine(str(project_root), str(palace_path)) mine(str(project_root), str(palace_path))
assert not pid_file.exists() assert not pid_file.exists()
def test_mine_does_not_remove_other_processes_pid_file(tmp_path): def test_mine_does_not_remove_other_processes_pid_file(tmp_path):
"""A PID file pointing at someone else's PID is left untouched.""" """A PID slot pointing at someone else's PID is left untouched."""
from unittest.mock import patch from unittest.mock import patch
project_root = tmp_path / "proj" project_root = tmp_path / "proj"
@@ -830,10 +832,10 @@ def test_mine_does_not_remove_other_processes_pid_file(tmp_path):
palace_path = project_root / "palace" palace_path = project_root / "palace"
other_pid = os.getpid() + 999_999 # a PID that isn't us other_pid = os.getpid() + 999_999 # a PID that isn't us
pid_file = tmp_path / "mine.pid" pid_file = tmp_path / "mine_abc.pid"
pid_file.write_text(str(other_pid)) pid_file.write_text(str(other_pid))
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file): with patch.dict(os.environ, {"MEMPALACE_MINE_PID_FILE": str(pid_file)}):
mine(str(project_root), str(palace_path)) mine(str(project_root), str(palace_path))
assert pid_file.exists(), "Foreign PID entries must not be removed" assert pid_file.exists(), "Foreign PID entries must not be removed"