Merge pull request #66 from MARUCIE/fix/sqlite-batch-reads
fix: batch ChromaDB reads to avoid SQLite variable limit
This commit is contained in:
+18
-7
@@ -253,20 +253,31 @@ def cmd_compress(args):
|
||||
print(" Run: mempalace init <dir> then mempalace mine <dir>")
|
||||
sys.exit(1)
|
||||
|
||||
# Query drawers in the wing
|
||||
# Query drawers in batches to avoid SQLite variable limit (~999)
|
||||
where = {"wing": args.wing} if args.wing else None
|
||||
_BATCH = 500
|
||||
docs, metas, ids = [], [], []
|
||||
offset = 0
|
||||
while True:
|
||||
try:
|
||||
kwargs = {"include": ["documents", "metadatas"]}
|
||||
kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset}
|
||||
if where:
|
||||
kwargs["where"] = where
|
||||
results = col.get(**kwargs)
|
||||
batch = col.get(**kwargs)
|
||||
except Exception as e:
|
||||
if not docs:
|
||||
print(f"\n Error reading drawers: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
docs = results["documents"]
|
||||
metas = results["metadatas"]
|
||||
ids = results["ids"]
|
||||
break
|
||||
batch_docs = batch.get("documents", [])
|
||||
if not batch_docs:
|
||||
break
|
||||
docs.extend(batch_docs)
|
||||
metas.extend(batch.get("metadatas", []))
|
||||
ids.extend(batch.get("ids", []))
|
||||
offset += len(batch_docs)
|
||||
if len(batch_docs) < _BATCH:
|
||||
break
|
||||
|
||||
if not docs:
|
||||
wing_label = f" in wing '{args.wing}'" if args.wing else ""
|
||||
|
||||
+17
-8
@@ -96,18 +96,27 @@ class Layer1:
|
||||
except Exception:
|
||||
return "## L1 — No palace found. Run: mempalace mine <dir>"
|
||||
|
||||
# Fetch all drawers (with optional wing filter)
|
||||
kwargs = {"include": ["documents", "metadatas"]}
|
||||
# Fetch all drawers in batches to avoid SQLite variable limit (~999)
|
||||
_BATCH = 500
|
||||
docs, metas = [], []
|
||||
offset = 0
|
||||
while True:
|
||||
kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset}
|
||||
if self.wing:
|
||||
kwargs["where"] = {"wing": self.wing}
|
||||
|
||||
try:
|
||||
results = col.get(**kwargs)
|
||||
batch = col.get(**kwargs)
|
||||
except Exception:
|
||||
return "## L1 — No drawers found."
|
||||
|
||||
docs = results.get("documents", [])
|
||||
metas = results.get("metadatas", [])
|
||||
break
|
||||
batch_docs = batch.get("documents", [])
|
||||
batch_metas = batch.get("metadatas", [])
|
||||
if not batch_docs:
|
||||
break
|
||||
docs.extend(batch_docs)
|
||||
metas.extend(batch_metas)
|
||||
offset += len(batch_docs)
|
||||
if len(batch_docs) < _BATCH:
|
||||
break
|
||||
|
||||
if not docs:
|
||||
return "## L1 — No memories yet."
|
||||
|
||||
Reference in New Issue
Block a user