fix: address PR review — per-palace lock, MCP server path, hook timeout, tests
Addresses the six Copilot review comments on the initial commit. 1) #6 (critical) — mcp_server.py `_get_collection` bypassed ChromaBackend The MCP server creates its palace collection directly via `chromadb.PersistentClient.get_or_create_collection` in `_get_collection`, not through `ChromaBackend.get_collection`. That path was missing the `hnsw:num_threads=1` metadata, so the primary crash surface for #974 and #965 was untouched by the original patch. Fixed by passing `hnsw:num_threads=1` at the mcp_server create site too. Documented in a code comment that the setting is only honored at creation time — existing palaces created before this fix still need a `mempalace nuke` + re-mine to gain the protection. 2) #3 — mine_global_lock over-serialized mines across unrelated palaces Replaced the single global lock file `mine_global.lock` with a per-palace lock keyed by `sha256(os.path.abspath(palace_path))` (`mine_palace_<hash>.lock`). Mines against the same palace still collapse to a single runner (the correctness boundary), but mines against *different* palaces are now free to run in parallel. `mine_global_lock` is kept as a backward-compatible alias for `mine_palace_lock` so any external callers that imported the previous name keep working. 3) #1 — hook_precompact swallowed OSError but not subprocess.TimeoutExpired `subprocess.run(..., timeout=60)` raises `TimeoutExpired` on slow palaces. The previous `except OSError` clause didn't catch it, so the hook could raise and fail to emit any JSON decision — leaving the harness without a block/passthrough signal. Fixed by catching `(OSError, subprocess.TimeoutExpired)` together and always falling through to the block decision so the hook reliably emits a response. 4) #2 + #4 — tests - tests/test_hooks_cli.py: added `test_precompact_first_two_attempts_block`, `test_precompact_passes_through_after_cap`, and `test_precompact_counter_is_per_session` to lock in the #955 deadlock fix. - tests/test_palace_locks.py (new): covers `mine_palace_lock` single-acquire, reuse-after-release, cross-process serialization on the same palace, non-interference across different palaces, path normalization, and the `mine_global_lock` back-compat alias. 5) #5 — known limitation, documented but not auto-fixed Copilot suggested detecting collections missing `hnsw:num_threads=1` and calling `collection.modify(metadata=...)` to retrofit existing palaces. Verified against chromadb 1.5.7: `modify(metadata=...)` replaces metadata rather than merging, and re-passing `hnsw:space="cosine"` then raises `ValueError: Changing the distance function of a collection once it is created is not supported currently.` The HNSW runtime configuration (`configuration_json`) also does not expose `num_threads` in chromadb 1.5.x, so the flag appears to be read only at creation time. Rather than paper over the limitation with a best-effort `modify` that silently drops `hnsw:space`, documented in the mcp_server comment that pre-existing palaces need a `mempalace nuke` + re-mine to gain the protection. Fresh palaces are always protected. Testing - pytest tests/test_palace_locks.py tests/test_hooks_cli.py tests/test_backends.py tests/test_cli.py → **98 passed, 0 failed**. - Runtime validation with two concurrent `mempalace mine` calls: - Different palaces → both complete in parallel ✓ - Same palace → one completes, the other exits with "another `mine` is already running against <palace> — exiting cleanly." ✓
This commit is contained in:
committed by
Igor Lins e Silva
parent
7e18a70796
commit
99b820cb42
+130
-43
@@ -1,59 +1,146 @@
|
||||
"""Tests for mine_global_lock: non-blocking cross-process lock semantics."""
|
||||
import threading
|
||||
"""Tests for mine_palace_lock — the per-palace non-blocking mine guard.
|
||||
|
||||
Covers the fix for the runaway mine fan-out described alongside issues
|
||||
#974 and #965: if N copies of `mempalace mine` are spawned concurrently
|
||||
against the same palace, they must collapse to a single runner rather
|
||||
than queue as waiters that will drive parallel HNSW inserts. Mines
|
||||
against *different* palaces must still be free to run in parallel.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from mempalace.palace import MineAlreadyRunning, mine_global_lock
|
||||
from mempalace.palace import (
|
||||
MineAlreadyRunning,
|
||||
mine_global_lock,
|
||||
mine_palace_lock,
|
||||
)
|
||||
|
||||
|
||||
def test_mine_global_lock_acquired(tmp_path, monkeypatch):
|
||||
"""Lock is acquired and released without error."""
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _hold_lock(palace_path: str, ready_flag: str, release_flag: str) -> int:
|
||||
"""Acquire mine_palace_lock, signal readiness, wait for release flag.
|
||||
|
||||
Returns 0 if we acquired the lock, 1 if MineAlreadyRunning was raised.
|
||||
Runs in a child process for true cross-process locking semantics.
|
||||
"""
|
||||
try:
|
||||
with mine_palace_lock(palace_path):
|
||||
# Tell the parent we hold the lock
|
||||
open(ready_flag, "w").close()
|
||||
# Wait until parent tells us to release
|
||||
for _ in range(500):
|
||||
if os.path.exists(release_flag):
|
||||
return 0
|
||||
time.sleep(0.01)
|
||||
return 0
|
||||
except MineAlreadyRunning:
|
||||
return 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_single_acquire_succeeds(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
with mine_global_lock():
|
||||
with mine_palace_lock(str(tmp_path / "palace")):
|
||||
pass # should not raise
|
||||
|
||||
|
||||
def test_mine_global_lock_second_acquire_raises(tmp_path, monkeypatch):
|
||||
"""Concurrent second acquire raises MineAlreadyRunning."""
|
||||
def test_lock_reusable_after_release(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
results: list[str] = []
|
||||
|
||||
with mine_global_lock():
|
||||
# While this lock is held, spawn a thread that tries to acquire.
|
||||
def try_acquire():
|
||||
try:
|
||||
with mine_global_lock():
|
||||
results.append("acquired")
|
||||
except MineAlreadyRunning:
|
||||
results.append("blocked")
|
||||
|
||||
t = threading.Thread(target=try_acquire)
|
||||
t.start()
|
||||
t.join(timeout=5)
|
||||
|
||||
assert results == ["blocked"]
|
||||
|
||||
|
||||
def test_mine_global_lock_reusable_after_release(tmp_path, monkeypatch):
|
||||
"""Lock can be re-acquired after the context manager exits."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with mine_global_lock():
|
||||
pass # first acquire + release
|
||||
|
||||
# Second acquire must succeed; MineAlreadyRunning would propagate as failure.
|
||||
with mine_global_lock():
|
||||
palace = str(tmp_path / "palace")
|
||||
with mine_palace_lock(palace):
|
||||
pass
|
||||
# Re-acquire must succeed now that the previous holder released
|
||||
with mine_palace_lock(palace):
|
||||
pass
|
||||
|
||||
|
||||
def test_mine_global_lock_exception_still_releases(tmp_path, monkeypatch):
|
||||
"""Lock is released even when the body raises."""
|
||||
def test_same_palace_serializes_across_processes(tmp_path, monkeypatch):
|
||||
"""Two processes contending for the same palace: second must be rejected."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
palace = str(tmp_path / "palace")
|
||||
ready = str(tmp_path / "ready")
|
||||
release = str(tmp_path / "release")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with mine_global_lock():
|
||||
raise ValueError("boom")
|
||||
ctx = multiprocessing.get_context("fork")
|
||||
holder = ctx.Process(target=_hold_lock, args=(palace, ready, release))
|
||||
holder.start()
|
||||
try:
|
||||
# Wait for the holder to acquire
|
||||
for _ in range(500):
|
||||
if os.path.exists(ready):
|
||||
break
|
||||
time.sleep(0.01)
|
||||
assert os.path.exists(ready), "holder failed to acquire lock in time"
|
||||
|
||||
# Must be acquirable again after the exception.
|
||||
with mine_global_lock():
|
||||
pass
|
||||
# From the parent, we must not be able to acquire the same palace lock
|
||||
with pytest.raises(MineAlreadyRunning):
|
||||
with mine_palace_lock(palace):
|
||||
pytest.fail("second acquire of same palace should have raised")
|
||||
finally:
|
||||
open(release, "w").close()
|
||||
holder.join(timeout=5)
|
||||
assert holder.exitcode == 0
|
||||
|
||||
|
||||
def test_different_palaces_dont_conflict(tmp_path, monkeypatch):
|
||||
"""Mines against different palaces must NOT block each other."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
palace_a = str(tmp_path / "palace_a")
|
||||
palace_b = str(tmp_path / "palace_b")
|
||||
ready = str(tmp_path / "ready_a")
|
||||
release = str(tmp_path / "release_a")
|
||||
|
||||
ctx = multiprocessing.get_context("fork")
|
||||
holder = ctx.Process(target=_hold_lock, args=(palace_a, ready, release))
|
||||
holder.start()
|
||||
try:
|
||||
for _ in range(500):
|
||||
if os.path.exists(ready):
|
||||
break
|
||||
time.sleep(0.01)
|
||||
assert os.path.exists(ready), "holder failed to acquire lock in time"
|
||||
|
||||
# Different palace — must succeed even while palace_a is held
|
||||
with mine_palace_lock(palace_b):
|
||||
pass # no exception expected
|
||||
finally:
|
||||
open(release, "w").close()
|
||||
holder.join(timeout=5)
|
||||
|
||||
|
||||
def test_palace_path_is_normalized(tmp_path, monkeypatch):
|
||||
"""Relative and absolute forms of the same path must use the same lock."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.chdir(tmp_path)
|
||||
os.makedirs(tmp_path / "palace", exist_ok=True)
|
||||
absolute = str(tmp_path / "palace")
|
||||
relative = "palace"
|
||||
|
||||
# Hold the lock with the absolute form; attempting to re-acquire with
|
||||
# the relative form (which resolves to the same absolute path) must fail.
|
||||
with mine_palace_lock(absolute):
|
||||
with pytest.raises(MineAlreadyRunning):
|
||||
with mine_palace_lock(relative):
|
||||
pytest.fail("normalized path collision should have raised")
|
||||
|
||||
|
||||
def test_mine_global_lock_is_alias_for_back_compat(tmp_path, monkeypatch):
|
||||
"""Old callers of `mine_global_lock` should still work."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
assert mine_global_lock is mine_palace_lock
|
||||
with mine_global_lock(str(tmp_path / "palace")):
|
||||
pass # the alias accepts the same palace_path argument
|
||||
|
||||
Reference in New Issue
Block a user