merge: develop into hnsw-repair (resolve chroma.py + test_backends.py conflicts)
Develop (post-#1162 lock-plumbing era) refactored the per-open quarantine pass into ChromaBackend._prepare_palace_for_open. This branch's inline-expansion form added quarantine_invalid_hnsw_metadata as a third check, plus a "discard from _quarantined_paths on inode swap" guard so re-opens against a different physical DB re-run quarantine. Resolution merges both: - _prepare_palace_for_open now also calls quarantine_invalid_hnsw_metadata, gated by the same _quarantined_paths set. - _client keeps the inode_changed -> _quarantined_paths.discard() guard before calling the helper, so a fresh inode triggers a fresh pass. - make_client collapses to a single _prepare_palace_for_open() call. - test_backends.py keeps both the pickle (#1285) and shutil (develop) imports — both are used.
This commit is contained in:
+158
-1
@@ -175,6 +175,61 @@ def test_cmd_init_normalizes_wing_name_for_topics_registry(mock_config_cls, tmp_
|
||||
assert mock_register.call_args.kwargs["wing"] == "my_cool_app"
|
||||
|
||||
|
||||
def test_cmd_init_honors_palace_flag(tmp_path, monkeypatch):
|
||||
"""Regression for #1313: ``cmd_init`` must honor ``--palace`` instead of
|
||||
silently writing to ``~/.mempalace``. Mirrors the env-var pattern used
|
||||
by ``cmd_mine`` / ``cmd_status`` / ``mcp_server`` so every downstream
|
||||
read of ``cfg.palace_path`` (Pass 0, ``cfg.init()``, post-init mine)
|
||||
routes to the user-specified location.
|
||||
"""
|
||||
project = tmp_path / "project"
|
||||
project.mkdir()
|
||||
palace = tmp_path / "custom_palace"
|
||||
|
||||
# Make sure no leftover env var from another test leaks in — we want to
|
||||
# verify that --palace ALONE drives the resolution. Prime monkeypatch's
|
||||
# undo list with setenv first so that the env var ``cmd_init`` writes
|
||||
# below is rolled back at teardown (``delenv(raising=False)`` on a
|
||||
# missing key registers no undo entry, which would leak into the next
|
||||
# test).
|
||||
monkeypatch.setenv("MEMPALACE_PALACE_PATH", "")
|
||||
monkeypatch.setenv("MEMPAL_PALACE_PATH", "")
|
||||
monkeypatch.delenv("MEMPALACE_PALACE_PATH")
|
||||
monkeypatch.delenv("MEMPAL_PALACE_PATH")
|
||||
|
||||
args = argparse.Namespace(
|
||||
dir=str(project),
|
||||
palace=str(palace),
|
||||
yes=True,
|
||||
auto_mine=False,
|
||||
)
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_pass_zero(project_dir, palace_dir, llm_provider):
|
||||
# Capture the palace_dir Pass 0 sees — this is the smoking-gun
|
||||
# value for the bug. Pre-fix it was always ~/.mempalace.
|
||||
captured["pass_zero_palace_dir"] = palace_dir
|
||||
return None
|
||||
|
||||
with (
|
||||
patch("mempalace.entity_detector.scan_for_detection", return_value=[]),
|
||||
patch("mempalace.room_detector_local.detect_rooms_local"),
|
||||
patch("mempalace.cli._run_pass_zero", side_effect=fake_pass_zero),
|
||||
patch("mempalace.cli._maybe_run_mine_after_init"),
|
||||
):
|
||||
cmd_init(args)
|
||||
|
||||
expected = str(palace)
|
||||
# Pass 0 must have been handed the --palace location, not ~/.mempalace.
|
||||
assert captured["pass_zero_palace_dir"] == expected
|
||||
# And the env var must point at the custom palace so any downstream
|
||||
# ``cfg.palace_path`` read in this process resolves correctly too.
|
||||
import os
|
||||
|
||||
assert os.environ.get("MEMPALACE_PALACE_PATH") == os.path.abspath(expected)
|
||||
|
||||
|
||||
@patch("mempalace.cli.MempalaceConfig")
|
||||
def test_cmd_init_with_entities_zero_total(mock_config_cls, tmp_path, capsys):
|
||||
"""When entities detected but total is 0, prints 'No entities' message."""
|
||||
@@ -934,7 +989,7 @@ def test_cmd_compress_with_config(mock_config_cls, tmp_path, capsys):
|
||||
|
||||
@patch("mempalace.cli.MempalaceConfig")
|
||||
def test_cmd_compress_stores_results(mock_config_cls, capsys):
|
||||
"""Non-dry-run compress stores to mempalace_compressed collection."""
|
||||
"""Non-dry-run compress stores to mempalace_closets collection (#1244)."""
|
||||
mock_config_cls.return_value.palace_path = "/fake/palace"
|
||||
args = argparse.Namespace(palace=None, wing=None, dry_run=False, config=None)
|
||||
mock_col = MagicMock()
|
||||
@@ -972,6 +1027,53 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys):
|
||||
assert "Stored" in out
|
||||
assert "Total:" in out
|
||||
mock_comp_col.upsert.assert_called_once()
|
||||
# Verify the compress output goes to the closets collection so that
|
||||
# palace.get_closets_collection() / searcher can read it back (#1244).
|
||||
(call_args, _kwargs) = mock_backend.get_or_create_collection.call_args
|
||||
assert (
|
||||
call_args[1] == "mempalace_closets"
|
||||
), f"compress should write to mempalace_closets, got {call_args[1]!r}"
|
||||
assert "mempalace_closets" in out
|
||||
|
||||
|
||||
def test_cmd_compress_output_readable_via_get_closets_collection(tmp_path, capsys):
|
||||
"""End-to-end: cmd_compress output must be readable via the same code
|
||||
path palace.py uses (`get_closets_collection`). Regression for #1244."""
|
||||
from mempalace.backends.chroma import ChromaBackend
|
||||
from mempalace.palace import get_closets_collection, get_collection
|
||||
|
||||
palace_path = str(tmp_path / "palace")
|
||||
|
||||
# Seed a drawer in the palace so cmd_compress has something to compress.
|
||||
drawers = get_collection(palace_path, "mempalace_drawers", create=True)
|
||||
drawers.upsert(
|
||||
ids=["drawer-1"],
|
||||
documents=["The quick brown fox jumps over the lazy dog."],
|
||||
metadatas=[{"wing": "test", "room": "demo", "source_file": "fox.txt"}],
|
||||
)
|
||||
|
||||
args = argparse.Namespace(palace=palace_path, wing=None, dry_run=False, config=None)
|
||||
with patch("mempalace.cli.MempalaceConfig") as mock_config_cls:
|
||||
mock_config_cls.return_value.palace_path = palace_path
|
||||
# Use a real ChromaBackend so the write actually lands on disk and
|
||||
# the read-side helper can find it.
|
||||
with patch("mempalace.backends.chroma.ChromaBackend", side_effect=ChromaBackend):
|
||||
cmd_compress(args)
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "Stored" in out
|
||||
|
||||
# Now read via the *same* code path palace.py / searcher uses.
|
||||
closets = get_closets_collection(palace_path, create=False)
|
||||
got = closets.get(ids=["drawer-1"], include=["documents", "metadatas"])
|
||||
assert got["ids"] == ["drawer-1"], (
|
||||
"compressed drawer not found in mempalace_closets — "
|
||||
"cmd_compress wrote to the wrong collection (#1244)"
|
||||
)
|
||||
assert got["documents"] and got["documents"][0], "empty compressed doc"
|
||||
meta = got["metadatas"][0]
|
||||
assert meta.get("wing") == "test"
|
||||
assert "compression_ratio" in meta
|
||||
|
||||
|
||||
def test_cmd_repair_trailing_slash_does_not_recurse():
|
||||
@@ -985,3 +1087,58 @@ def test_cmd_repair_trailing_slash_does_not_recurse():
|
||||
palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
|
||||
backup_path = palace_path + ".backup"
|
||||
assert not backup_path.startswith(palace_path + os.sep)
|
||||
|
||||
|
||||
# ── stdio reconfigure on Windows ─────────────────────────────────────
|
||||
|
||||
|
||||
class _ReconfigurableStringIO:
|
||||
def __init__(self):
|
||||
self.reconfigure_calls = []
|
||||
|
||||
def reconfigure(self, **kwargs):
|
||||
self.reconfigure_calls.append(kwargs)
|
||||
|
||||
|
||||
def test_reconfigures_stdio_to_utf8_on_windows():
|
||||
"""Windows `mempalace` CLI must decode/encode stdio as UTF-8.
|
||||
|
||||
Without this, piped non-ASCII input (`mempalace search ... < q.txt`)
|
||||
or piped non-ASCII output (`mempalace search "..." > out.txt`) is
|
||||
mojibaked through the system ANSI codepage on non-Latin Windows
|
||||
locales (cp1252/cp1251/cp950).
|
||||
"""
|
||||
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
|
||||
|
||||
stdin = _ReconfigurableStringIO()
|
||||
stdout = _ReconfigurableStringIO()
|
||||
stderr = _ReconfigurableStringIO()
|
||||
with (
|
||||
patch.object(sys, "platform", "win32"),
|
||||
patch.object(sys, "stdin", stdin),
|
||||
patch.object(sys, "stdout", stdout),
|
||||
patch.object(sys, "stderr", stderr),
|
||||
):
|
||||
_reconfigure_stdio_utf8_on_windows()
|
||||
|
||||
# Per-stream errors policy: stdin survives bad bytes via
|
||||
# surrogateescape so a redirected non-UTF-8 file does not crash
|
||||
# the read; stdout/stderr use replace so a drawer carrying a
|
||||
# round-tripped surrogate half does not crash mid-print.
|
||||
assert stdin.reconfigure_calls == [{"encoding": "utf-8", "errors": "surrogateescape"}]
|
||||
assert stdout.reconfigure_calls == [{"encoding": "utf-8", "errors": "replace"}]
|
||||
assert stderr.reconfigure_calls == [{"encoding": "utf-8", "errors": "replace"}]
|
||||
|
||||
|
||||
def test_reconfigure_stdio_is_noop_off_windows():
|
||||
"""Linux/macOS already default to UTF-8 stdio -- helper must not touch streams."""
|
||||
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
|
||||
|
||||
stdin = _ReconfigurableStringIO()
|
||||
with (
|
||||
patch.object(sys, "platform", "linux"),
|
||||
patch.object(sys, "stdin", stdin),
|
||||
):
|
||||
_reconfigure_stdio_utf8_on_windows()
|
||||
|
||||
assert stdin.reconfigure_calls == []
|
||||
|
||||
Reference in New Issue
Block a user