Merge pull request #66 from MARUCIE/fix/sqlite-batch-reads

fix: batch ChromaDB reads to avoid SQLite variable limit
This commit is contained in:
Ben Sigman
2026-04-07 13:58:52 -07:00
committed by GitHub
2 changed files with 45 additions and 25 deletions
+24 -13
View File
@@ -253,20 +253,31 @@ def cmd_compress(args):
print(" Run: mempalace init <dir> then mempalace mine <dir>") print(" Run: mempalace init <dir> then mempalace mine <dir>")
sys.exit(1) sys.exit(1)
# Query drawers in the wing # Query drawers in batches to avoid SQLite variable limit (~999)
where = {"wing": args.wing} if args.wing else None where = {"wing": args.wing} if args.wing else None
try: _BATCH = 500
kwargs = {"include": ["documents", "metadatas"]} docs, metas, ids = [], [], []
if where: offset = 0
kwargs["where"] = where while True:
results = col.get(**kwargs) try:
except Exception as e: kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset}
print(f"\n Error reading drawers: {e}") if where:
sys.exit(1) kwargs["where"] = where
batch = col.get(**kwargs)
docs = results["documents"] except Exception as e:
metas = results["metadatas"] if not docs:
ids = results["ids"] print(f"\n Error reading drawers: {e}")
sys.exit(1)
break
batch_docs = batch.get("documents", [])
if not batch_docs:
break
docs.extend(batch_docs)
metas.extend(batch.get("metadatas", []))
ids.extend(batch.get("ids", []))
offset += len(batch_docs)
if len(batch_docs) < _BATCH:
break
if not docs: if not docs:
wing_label = f" in wing '{args.wing}'" if args.wing else "" wing_label = f" in wing '{args.wing}'" if args.wing else ""
+21 -12
View File
@@ -96,18 +96,27 @@ class Layer1:
except Exception: except Exception:
return "## L1 — No palace found. Run: mempalace mine <dir>" return "## L1 — No palace found. Run: mempalace mine <dir>"
# Fetch all drawers (with optional wing filter) # Fetch all drawers in batches to avoid SQLite variable limit (~999)
kwargs = {"include": ["documents", "metadatas"]} _BATCH = 500
if self.wing: docs, metas = [], []
kwargs["where"] = {"wing": self.wing} offset = 0
while True:
try: kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset}
results = col.get(**kwargs) if self.wing:
except Exception: kwargs["where"] = {"wing": self.wing}
return "## L1 — No drawers found." try:
batch = col.get(**kwargs)
docs = results.get("documents", []) except Exception:
metas = results.get("metadatas", []) break
batch_docs = batch.get("documents", [])
batch_metas = batch.get("metadatas", [])
if not batch_docs:
break
docs.extend(batch_docs)
metas.extend(batch_metas)
offset += len(batch_docs)
if len(batch_docs) < _BATCH:
break
if not docs: if not docs:
return "## L1 — No memories yet." return "## L1 — No memories yet."