fix: guard against data loss in repair, migrate, and CLI rebuild

- repair.py: wrap upsert loop in try/except; restore from backup on
  failure instead of leaving a partially rebuilt collection
- migrate.py: replace non-atomic rmtree+move with rename-aside swap
  so a crash between the two calls does not destroy both copies
- cli.py: use offset += len(batch["ids"]) with empty-batch guard
  instead of fixed offset += batch_size to prevent skipping drawers
This commit is contained in:
JunghwanNA
2026-04-16 12:11:18 +09:00
parent d4c942417a
commit 5dfe853154
3 changed files with 34 additions and 11 deletions
+19 -7
View File
@@ -266,13 +266,25 @@ def rebuild_index(palace_path=None):
new_col = backend.create_collection(palace_path, COLLECTION_NAME)
filed = 0
for i in range(0, len(all_ids), batch_size):
batch_ids = all_ids[i : i + batch_size]
batch_docs = all_docs[i : i + batch_size]
batch_metas = all_metas[i : i + batch_size]
new_col.upsert(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
filed += len(batch_ids)
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
try:
for i in range(0, len(all_ids), batch_size):
batch_ids = all_ids[i : i + batch_size]
batch_docs = all_docs[i : i + batch_size]
batch_metas = all_metas[i : i + batch_size]
new_col.upsert(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
filed += len(batch_ids)
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
except Exception as e:
print(f"\n ERROR during rebuild: {e}")
print(f" Only {filed}/{len(all_ids)} drawers were re-filed.")
if os.path.exists(backup_path):
print(f" Restoring from backup: {backup_path}")
backend.delete_collection(palace_path, COLLECTION_NAME)
shutil.copy2(backup_path, sqlite_path)
print(" Backup restored. Palace is back to pre-repair state.")
else:
print(" No backup available. Re-mine from source files to recover.")
raise
print(f"\n Repair complete. {filed} drawers rebuilt.")
print(" HNSW index is now clean with cosine distance metric.")