From 60bea83e76359dacb060100c090160f256f99e70 Mon Sep 17 00:00:00 2001 From: bensig Date: Fri, 10 Apr 2026 00:08:28 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20mempalace=20migrate=20=E2=80=94=20recov?= =?UTF-8?q?er=20palaces=20from=20different=20ChromaDB=20versions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reads documents and metadata directly from ChromaDB's SQLite (bypassing the API that fails on version-mismatched databases), then reimports into a fresh palace using the currently installed ChromaDB. Fixes the 3.0.0 → 3.1.0 upgrade path where chromadb was downgraded from 1.5.x to 0.6.x, breaking the on-disk storage format. - Detects chromadb version from SQLite schema (0.6.x vs 1.x) - Extracts all drawers with full metadata via raw SQL - Builds fresh palace in temp dir, swaps atomically - Backs up original palace before any changes - Supports --dry-run to preview without modifying Fixes #457 --- mempalace/cli.py | 20 ++++ mempalace/migrate.py | 214 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 mempalace/migrate.py diff --git a/mempalace/cli.py b/mempalace/cli.py index d8dc697..1d106ca 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -150,6 +150,14 @@ def cmd_split(args): sys.argv = old_argv +def cmd_migrate(args): + """Migrate palace from a different ChromaDB version.""" + from .migrate import migrate + + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + migrate(palace_path=palace_path, dry_run=args.dry_run) + + def cmd_status(args): from .miner import status @@ -531,6 +539,17 @@ def main(): ) # status + # migrate + p_migrate = sub.add_parser( + "migrate", + help="Migrate palace from a different ChromaDB version (fixes 3.0.0 → 3.1.0 upgrade)", + ) + p_migrate.add_argument( + "--dry-run", + action="store_true", + help="Show what would be migrated without changing anything", + ) + sub.add_parser("status", help="Show what's been filed") args = parser.parse_args() @@ -565,6 +584,7 @@ def main(): "compress": cmd_compress, "wake-up": cmd_wakeup, "repair": cmd_repair, + "migrate": cmd_migrate, "status": cmd_status, } dispatch[args.command](args) diff --git a/mempalace/migrate.py b/mempalace/migrate.py new file mode 100644 index 0000000..848ab67 --- /dev/null +++ b/mempalace/migrate.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +""" +mempalace migrate — Recover a palace created with a different ChromaDB version. + +Reads documents and metadata directly from the palace's SQLite database +(bypassing ChromaDB's API, which fails on version-mismatched palaces), +then re-imports everything into a fresh palace using the currently installed +ChromaDB version. + +This fixes the 3.0.0 → 3.1.0 upgrade path where chromadb was downgraded +from 1.5.x to 0.6.x, breaking the on-disk storage format. + +Usage: + mempalace migrate # migrate default palace + mempalace migrate --palace /path/to/palace # migrate specific palace + mempalace migrate --dry-run # show what would be migrated +""" + +import os +import shutil +import sqlite3 +from collections import defaultdict +from datetime import datetime + + +def extract_drawers_from_sqlite(db_path: str) -> list: + """Read all drawers directly from ChromaDB's SQLite, bypassing the API. + + Works regardless of which ChromaDB version created the database. + Returns list of dicts with 'id', 'document', and 'metadata' keys. + """ + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + + # Get all embedding IDs and their documents + rows = conn.execute(""" + SELECT e.embedding_id, + MAX(CASE WHEN em.key = 'chroma:document' THEN em.string_value END) as document + FROM embeddings e + JOIN embedding_metadata em ON em.id = e.id + GROUP BY e.embedding_id + """).fetchall() + + drawers = [] + for row in rows: + embedding_id = row["embedding_id"] + document = row["document"] + if not document: + continue + + # Get metadata for this embedding + meta_rows = conn.execute( + """ + SELECT em.key, em.string_value, em.int_value, em.float_value, em.bool_value + FROM embedding_metadata em + JOIN embeddings e ON e.id = em.id + WHERE e.embedding_id = ? + AND em.key NOT LIKE 'chroma:%' + """, + (embedding_id,), + ).fetchall() + + metadata = {} + for mr in meta_rows: + key = mr["key"] + if mr["string_value"] is not None: + metadata[key] = mr["string_value"] + elif mr["int_value"] is not None: + metadata[key] = mr["int_value"] + elif mr["float_value"] is not None: + metadata[key] = mr["float_value"] + elif mr["bool_value"] is not None: + metadata[key] = bool(mr["bool_value"]) + + drawers.append( + { + "id": embedding_id, + "document": document, + "metadata": metadata, + } + ) + + conn.close() + return drawers + + +def detect_chromadb_version(db_path: str) -> str: + """Detect which ChromaDB version created the database by checking schema.""" + conn = sqlite3.connect(db_path) + try: + # 1.x has schema_str column in collections table + cols = [r[1] for r in conn.execute("PRAGMA table_info(collections)").fetchall()] + if "schema_str" in cols: + return "1.x" + # 0.6.x has embeddings_queue but no schema_str + tables = [ + r[0] + for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() + ] + if "embeddings_queue" in tables: + return "0.6.x" + return "unknown" + finally: + conn.close() + + +def migrate(palace_path: str, dry_run: bool = False): + """Migrate a palace to the currently installed ChromaDB version.""" + import chromadb + + palace_path = os.path.expanduser(palace_path) + db_path = os.path.join(palace_path, "chroma.sqlite3") + + if not os.path.isfile(db_path): + print(f"\n No palace database found at {db_path}") + return False + + print(f"\n{'=' * 60}") + print(" MemPalace Migrate") + print(f"{'=' * 60}\n") + print(f" Palace: {palace_path}") + print(f" Database: {db_path}") + print(f" DB size: {os.path.getsize(db_path) / 1024 / 1024:.1f} MB") + + # Detect version + source_version = detect_chromadb_version(db_path) + print(f" Source: ChromaDB {source_version}") + print(f" Target: ChromaDB {chromadb.__version__}") + + # Try reading with current chromadb first + try: + client = chromadb.PersistentClient(path=palace_path) + col = client.get_collection("mempalace_drawers") + count = col.count() + print(f"\n Palace is already readable by chromadb {chromadb.__version__}.") + print(f" {count} drawers found. No migration needed.") + return True + except Exception: + print(f"\n Palace is NOT readable by chromadb {chromadb.__version__}.") + print(" Extracting from SQLite directly...") + + # Extract all drawers via raw SQL + drawers = extract_drawers_from_sqlite(db_path) + print(f" Extracted {len(drawers)} drawers from SQLite") + + if not drawers: + print(" Nothing to migrate.") + return True + + # Show summary + wings = defaultdict(lambda: defaultdict(int)) + for d in drawers: + w = d["metadata"].get("wing", "?") + r = d["metadata"].get("room", "?") + wings[w][r] += 1 + + print("\n Summary:") + for wing, rooms in sorted(wings.items()): + total = sum(rooms.values()) + print(f" WING: {wing} ({total} drawers)") + for room, count in sorted(rooms.items(), key=lambda x: -x[1]): + print(f" ROOM: {room:30} {count:5}") + + if dry_run: + print("\n DRY RUN — no changes made.") + print(f" Would migrate {len(drawers)} drawers.") + return True + + # Backup the old palace + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_path = f"{palace_path}.pre-migrate.{timestamp}" + print(f"\n Backing up to {backup_path}...") + shutil.copytree(palace_path, backup_path) + + # Build fresh palace in a temp directory (avoids chromadb reading old state) + import tempfile + + temp_palace = tempfile.mkdtemp(prefix="mempalace_migrate_") + print(f" Creating fresh palace in {temp_palace}...") + client = chromadb.PersistentClient(path=temp_palace) + col = client.get_or_create_collection("mempalace_drawers") + + # Re-import in batches + batch_size = 500 + imported = 0 + for i in range(0, len(drawers), batch_size): + batch = drawers[i : i + batch_size] + col.add( + ids=[d["id"] for d in batch], + documents=[d["document"] for d in batch], + metadatas=[d["metadata"] for d in batch], + ) + imported += len(batch) + print(f" Imported {imported}/{len(drawers)} drawers...") + + # Verify before swapping + final_count = col.count() + del col + del client + + # Swap: remove old palace, move new one into place + print(" Swapping old palace for migrated version...") + shutil.rmtree(palace_path) + shutil.move(temp_palace, palace_path) + + print("\n Migration complete.") + print(f" Drawers migrated: {final_count}") + print(f" Backup at: {backup_path}") + + if final_count != len(drawers): + print(f" WARNING: Expected {len(drawers)}, got {final_count}") + + print(f"\n{'=' * 60}\n") + return True