fix: batch ChromaDB reads to avoid SQLite variable limit

col.get() without limit generates SELECT ... WHERE id IN (...) with all document IDs, which exceeds SQLite's ~999 variable limit when a palace has more than ~1000 drawers. This breaks both `mempalace compress` and `mempalace wake-up` on large palaces. Reproduced on a 13880-file codebase (242K+ drawers). Fix: paginate reads in batches of 500 using ChromaDB's offset/limit parameters in both Layer1.generate() and cmd_compress().
2026-04-07 21:40:12 +08:00
parent 1782628b8a
commit 0e77981dec
2 changed files with 45 additions and 25 deletions
@@ -96,18 +96,27 @@ class Layer1:
        except Exception:
            return "## L1 — No palace found. Run: mempalace mine <dir>"

-        # Fetch all drawers (with optional wing filter)
-        kwargs = {"include": ["documents", "metadatas"]}
-        if self.wing:
-            kwargs["where"] = {"wing": self.wing}
-
-        try:
-            results = col.get(**kwargs)
-        except Exception:
-            return "## L1 — No drawers found."
-
-        docs = results.get("documents", [])
-        metas = results.get("metadatas", [])
+        # Fetch all drawers in batches to avoid SQLite variable limit (~999)
+        _BATCH = 500
+        docs, metas = [], []
+        offset = 0
+        while True:
+            kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset}
+            if self.wing:
+                kwargs["where"] = {"wing": self.wing}
+            try:
+                batch = col.get(**kwargs)
+            except Exception:
+                break
+            batch_docs = batch.get("documents", [])
+            batch_metas = batch.get("metadatas", [])
+            if not batch_docs:
+                break
+            docs.extend(batch_docs)
+            metas.extend(batch_metas)
+            offset += len(batch_docs)
+            if len(batch_docs) < _BATCH:
+                break

        if not docs:
            return "## L1 — No memories yet."