From 2f509b4789d7ae44f8fbf1281df812e098207aa3 Mon Sep 17 00:00:00 2001 From: Mika Cohen Date: Thu, 30 Apr 2026 09:31:32 -0600 Subject: [PATCH] fix(cli): restore backup on repair failure --- mempalace/cli.py | 62 +++++++++++++++++++++++++++-------------------- tests/test_cli.py | 50 +++++++++++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 27 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index ca9798b..27c81d3 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -648,7 +648,14 @@ def cmd_repair(args): import shutil from .backends.chroma import ChromaBackend from .migrate import confirm_destructive_action, contains_palace_database - from .repair import TruncationDetected, check_extraction_safety + from .repair import ( + RebuildCollectionError, + TruncationDetected, + _close_chroma_handles, + _extract_drawers, + _rebuild_collection_via_temp, + check_extraction_safety, + ) palace_path = os.path.abspath( os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path @@ -705,18 +712,7 @@ def cmd_repair(args): # Extract all drawers in batches print("\n Extracting drawers...") batch_size = 5000 - all_ids = [] - all_docs = [] - all_metas = [] - offset = 0 - while offset < total: - batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"]) - if not batch["ids"]: - break - all_ids.extend(batch["ids"]) - all_docs.extend(batch["documents"]) - all_metas.extend(batch["metadatas"]) - offset += len(batch["ids"]) + all_ids, all_docs, all_metas = _extract_drawers(col, total, batch_size) print(f" Extracted {len(all_ids)} drawers") # ── #1208 guard ────────────────────────────────────────────────── @@ -736,7 +732,6 @@ def cmd_repair(args): print(e.message) return - # Backup and rebuild palace_path = os.path.normpath(palace_path) backup_path = palace_path + ".backup" if os.path.exists(backup_path): @@ -750,18 +745,33 @@ def cmd_repair(args): print(f" Backing up to {backup_path}...") shutil.copytree(palace_path, backup_path) - print(" Rebuilding collection...") - backend.delete_collection(palace_path, "mempalace_drawers") - new_col = backend.create_collection(palace_path, "mempalace_drawers") - - filed = 0 - for i in range(0, len(all_ids), batch_size): - batch_ids = all_ids[i : i + batch_size] - batch_docs = all_docs[i : i + batch_size] - batch_metas = all_metas[i : i + batch_size] - new_col.add(documents=batch_docs, ids=batch_ids, metadatas=batch_metas) - filed += len(batch_ids) - print(f" Re-filed {filed}/{len(all_ids)} drawers...") + try: + filed = _rebuild_collection_via_temp( + backend, + palace_path, + all_ids, + all_docs, + all_metas, + batch_size, + progress=print, + ) + except RebuildCollectionError as e: + print(f" Repair failed: {e}") + if getattr(e, "live_replaced", False): + print(" Live collection was already replaced; restoring from backup...") + try: + _close_chroma_handles(palace_path) + if os.path.exists(palace_path): + shutil.rmtree(palace_path) + shutil.copytree(backup_path, palace_path) + print(f" Restore complete from backup: {backup_path}") + except Exception as restore_error: + print(f" Automatic restore failed: {restore_error}") + print(" Manual recovery required:") + print(f" 1. Remove or rename the broken directory: {palace_path}") + print(f" 2. Restore the backup directory to: {palace_path}") + print(f" Backup location: {backup_path}") + sys.exit(1) print(f"\n Repair complete. {filed} drawers rebuilt.") print(f" Backup saved at {backup_path}") diff --git a/tests/test_cli.py b/tests/test_cli.py index af7b39d..11845fe 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,7 +4,7 @@ import argparse import shlex import sys from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch import pytest @@ -760,13 +760,61 @@ def test_cmd_repair_success(mock_config_cls, tmp_path, capsys): "documents": ["doc1", "doc2"], "metadatas": [{"wing": "a"}, {"wing": "b"}], } + mock_temp_col = MagicMock() + mock_temp_col.count.return_value = 2 mock_new_col = MagicMock() + mock_new_col.count.return_value = 2 mock_backend = _mock_backend_for(col=mock_col, new_col=mock_new_col) + mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col] with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend): cmd_repair(args) out = capsys.readouterr().out assert "Repair complete" in out assert "2 drawers rebuilt" in out + assert mock_backend.delete_collection.call_args_list == [ + call(str(palace_dir), "mempalace_drawers__repair_tmp"), + call(str(palace_dir), "mempalace_drawers"), + call(str(palace_dir), "mempalace_drawers__repair_tmp"), + ] + mock_temp_col.upsert.assert_called_once() + mock_new_col.upsert.assert_called_once() + mock_new_col.add.assert_not_called() + + +@patch("mempalace.cli.MempalaceConfig") +def test_cmd_repair_restores_backup_on_live_rebuild_failure(mock_config_cls, tmp_path, capsys): + palace_dir = tmp_path / "palace" + palace_dir.mkdir() + (palace_dir / "chroma.sqlite3").write_text("db") + mock_config_cls.return_value.palace_path = str(palace_dir) + args = argparse.Namespace(palace=None, yes=True) + mock_col = MagicMock() + mock_col.count.return_value = 2 + mock_col.get.return_value = { + "ids": ["id1", "id2"], + "documents": ["doc1", "doc2"], + "metadatas": [{"wing": "a"}, {"wing": "b"}], + } + mock_temp_col = MagicMock() + mock_temp_col.count.return_value = 2 + mock_backend = _mock_backend_for(col=mock_col) + mock_backend.create_collection.side_effect = [mock_temp_col, RuntimeError("live build failed")] + with ( + patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend), + patch("mempalace.repair._close_chroma_handles") as mock_close_handles, + ): + with pytest.raises(SystemExit) as excinfo: + cmd_repair(args) + out = capsys.readouterr().out + assert excinfo.value.code == 1 + assert "Repair failed" in out + assert "restoring from backup" in out + mock_close_handles.assert_called_once_with(str(palace_dir)) + assert mock_backend.delete_collection.call_args_list == [ + call(str(palace_dir), "mempalace_drawers__repair_tmp"), + call(str(palace_dir), "mempalace_drawers"), + call(str(palace_dir), "mempalace_drawers__repair_tmp"), + ] @patch("mempalace.cli.MempalaceConfig")