From cbd6e5d65d15edb6026a238aea06e2736af7942a Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sat, 2 May 2026 22:54:14 -0300 Subject: [PATCH] fix(cli): write compress output to mempalace_closets so palace can read them (#1244) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `cmd_compress` was writing AAAK-compressed drawers to a `mempalace_compressed` collection, but every read path (`palace.get_closets_collection`, `searcher.py`, `repair.py`) reads from `mempalace_closets`. Result: for non-mined palaces (or any palace where the user ran `mempalace compress` expecting to backfill the closet/index layer), the compressed output was silently invisible — written to a collection nothing else opens. Fix the writer rather than renaming the readers: "closets" is the user-visible feature name baked into the public API (`get_closets_collection`), the searcher hybrid path, repair/HNSW diagnostics, and docs. Renaming the readers would churn 15+ call sites and the README for no benefit. The compressed AAAK strings are exactly what closets are conceptually — compact pointers scanned by an LLM to locate the right drawer — so they belong in `mempalace_closets`. Tests: - Update `test_cmd_compress_stores_results` to assert the collection name passed to `get_or_create_collection` is `mempalace_closets`. - Add `test_cmd_compress_output_readable_via_get_closets_collection`: end-to-end with a real ChromaBackend, seed a drawer, run cmd_compress, then read back via the same `get_closets_collection` helper that palace.py / searcher use. Regression test for the wrong-collection bug. Co-Authored-By: Claude Opus 4.7 (1M context) --- mempalace/cli.py | 4 ++-- tests/test_cli.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index ca9798b..d47f38e 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -902,7 +902,7 @@ def cmd_compress(args): # Store compressed versions (unless dry-run) if not args.dry_run: try: - comp_col = backend.get_or_create_collection(palace_path, "mempalace_compressed") + comp_col = backend.get_or_create_collection(palace_path, "mempalace_closets") for doc_id, compressed, meta, stats in compressed_entries: comp_meta = dict(meta) comp_meta["compression_ratio"] = round(stats["size_ratio"], 1) @@ -913,7 +913,7 @@ def cmd_compress(args): metadatas=[comp_meta], ) print( - f" Stored {len(compressed_entries)} compressed drawers in 'mempalace_compressed' collection." + f" Stored {len(compressed_entries)} compressed drawers in 'mempalace_closets' collection." ) except Exception as e: print(f" Error storing compressed drawers: {e}") diff --git a/tests/test_cli.py b/tests/test_cli.py index af7b39d..74521e6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -889,7 +889,7 @@ def test_cmd_compress_with_config(mock_config_cls, tmp_path, capsys): @patch("mempalace.cli.MempalaceConfig") def test_cmd_compress_stores_results(mock_config_cls, capsys): - """Non-dry-run compress stores to mempalace_compressed collection.""" + """Non-dry-run compress stores to mempalace_closets collection (#1244).""" mock_config_cls.return_value.palace_path = "/fake/palace" args = argparse.Namespace(palace=None, wing=None, dry_run=False, config=None) mock_col = MagicMock() @@ -927,6 +927,53 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys): assert "Stored" in out assert "Total:" in out mock_comp_col.upsert.assert_called_once() + # Verify the compress output goes to the closets collection so that + # palace.get_closets_collection() / searcher can read it back (#1244). + (call_args, _kwargs) = mock_backend.get_or_create_collection.call_args + assert call_args[1] == "mempalace_closets", ( + f"compress should write to mempalace_closets, got {call_args[1]!r}" + ) + assert "mempalace_closets" in out + + +def test_cmd_compress_output_readable_via_get_closets_collection(tmp_path, capsys): + """End-to-end: cmd_compress output must be readable via the same code + path palace.py uses (`get_closets_collection`). Regression for #1244.""" + from mempalace.backends.chroma import ChromaBackend + from mempalace.palace import get_closets_collection, get_collection + + palace_path = str(tmp_path / "palace") + + # Seed a drawer in the palace so cmd_compress has something to compress. + drawers = get_collection(palace_path, "mempalace_drawers", create=True) + drawers.upsert( + ids=["drawer-1"], + documents=["The quick brown fox jumps over the lazy dog."], + metadatas=[{"wing": "test", "room": "demo", "source_file": "fox.txt"}], + ) + + args = argparse.Namespace(palace=palace_path, wing=None, dry_run=False, config=None) + with patch("mempalace.cli.MempalaceConfig") as mock_config_cls: + mock_config_cls.return_value.palace_path = palace_path + # Use a real ChromaBackend so the write actually lands on disk and + # the read-side helper can find it. + with patch("mempalace.backends.chroma.ChromaBackend", side_effect=ChromaBackend): + cmd_compress(args) + + out = capsys.readouterr().out + assert "Stored" in out + + # Now read via the *same* code path palace.py / searcher uses. + closets = get_closets_collection(palace_path, create=False) + got = closets.get(ids=["drawer-1"], include=["documents", "metadatas"]) + assert got["ids"] == ["drawer-1"], ( + "compressed drawer not found in mempalace_closets — " + "cmd_compress wrote to the wrong collection (#1244)" + ) + assert got["documents"] and got["documents"][0], "empty compressed doc" + meta = got["metadatas"][0] + assert meta.get("wing") == "test" + assert "compression_ratio" in meta def test_cmd_repair_trailing_slash_does_not_recurse():