perf(mining): batch per-chunk upserts and add optional GPU acceleration

The miner upserted one drawer per ChromaDB call, paying tokenizer + ONNX session setup per chunk. The embedding device was CPU-only because no EmbeddingFunction was ever wired through the backend. Two changes, each a speedup in its own right; stacked they give ~10x end-to-end on a medium corpus (20 files, 568 drawers): 1. Batched upsert. `process_file` and `_file_chunks_locked` now collect all chunks of a file into a single `collection.upsert(...)` so the embedding model runs one forward pass per file instead of N. 2. Hardware-accelerated embedding function. New `mempalace/embedding.py` wraps `ONNXMiniLM_L6_V2` with configurable `preferred_providers`. `MEMPALACE_EMBEDDING_DEVICE` (or `embedding_device` in config.json) selects auto / cpu / cuda / coreml / dml. Unavailable accelerators log a warning and fall back to CPU. The factory subclasses `ONNXMiniLM_L6_V2` and spoofs its `name()` to `"default"` so the persisted EF identity matches existing palaces created with ChromaDB's bare `DefaultEmbeddingFunction` -- same model, same 384-dim vectors, no rebuild needed when turning GPU on. `ChromaBackend.get_collection` / `create_collection` now pass the resolved EF on every call so miner writes and searcher reads agree. Benchmarks (i9-12900KF + RTX 3090, medium scenario, 568 drawers): per-chunk + CPU 19.77s · 29 drw/s (baseline) batched + CPU 8.07s · 70 drw/s (2.4x) batched + CUDA 2.15s · 264 drw/s (9.2x) Reproducible via `benchmarks/mine_bench.py`. Install paths: pip install mempalace[gpu] # NVIDIA CUDA pip install mempalace[dml] # DirectML (Windows) pip install mempalace[coreml] # macOS Neural Engine Mine header now prints `Device: cpu|cuda|...` so users can confirm the accelerator engaged.
2026-04-24 19:42:35 -03:00
parent 7a757916b3
commit a4868a3589
8 changed files with 784 additions and 61 deletions
@@ -0,0 +1,301 @@
 """Mining throughput benchmark: per-chunk vs batched upsert, CPU vs GPU.
 Compares the legacy per-chunk ``add_drawer`` loop against the batched
 ``collection.upsert`` path introduced in the "batched upsert + GPU" PR.
 Runs both paths on an identical seeded synthetic corpus, reports
 wall-clock time + drawers/sec, and prints a markdown table suitable
 for pasting into a PR description.
 Usage
 -----
    # CPU (whatever onnxruntime is installed — CPU if you don't have
    # onnxruntime-gpu):
    uv run python benchmarks/mine_bench.py
    # GPU (NVIDIA):
    uv venv /tmp/gpu && source /tmp/gpu/bin/activate
    uv pip install -e '.[gpu]' 'nvidia-cudnn-cu12>=9,<10' \\
        'nvidia-cuda-runtime-cu12' 'nvidia-cublas-cu12'
    export LD_LIBRARY_PATH=$(python -c "import nvidia.cudnn, os; \\
        print(os.path.dirname(nvidia.cudnn.__file__)+'/lib')"):$LD_LIBRARY_PATH
    MEMPALACE_EMBEDDING_DEVICE=cuda python benchmarks/mine_bench.py
 Flags
 -----
    --device cpu|cuda|coreml|dml|auto   Override MEMPALACE_EMBEDDING_DEVICE
    --scenarios small,medium,large      Which scenarios to run
    --seed 42                           RNG seed for reproducibility
 """
 from __future__ import annotations
 import argparse
 import hashlib
 import os
 import random
 import shutil
 import string
 import sys
 import tempfile
 import time
 from datetime import datetime
 from pathlib import Path
 def build_corpus(dest: Path, n_files: int, paragraphs_per_file: int, seed: int) -> None:
    """Generate ``n_files`` markdown files of random words under ``dest``."""
    rng = random.Random(seed)
    dest.mkdir(parents=True, exist_ok=True)
    for i in range(n_files):
        paragraphs = []
        for _ in range(paragraphs_per_file):
            words = [
                "".join(rng.choices(string.ascii_lowercase, k=rng.randint(3, 10)))
                for _ in range(12)
            ]
            paragraphs.append(" ".join(words))
        (dest / f"doc_{i:03d}.md").write_text("\n\n".join(paragraphs))
    (dest / "mempalace.yaml").write_text(
        "wing: bench\n"
        "rooms:\n"
        "  - name: general\n"
        "    description: all\n"
        "    keywords: [general]\n"
    )
 def _process_file_unbatched(filepath, project_path, collection, wing, rooms, agent, closets_col):
    """Legacy per-chunk upsert path (pre-batching).
    Reproduces the exact loop shape the miner used before this PR so the
    comparison is apples-to-apples; only the upsert granularity differs.
    """
    from mempalace import miner
    from mempalace.palace import (
        build_closet_lines,
        file_already_mined,
        mine_lock,
        purge_file_closets,
        upsert_closet_lines,
    )
    source_file = str(filepath)
    if file_already_mined(collection, source_file, check_mtime=True):
        return 0, "general"
    try:
        content = filepath.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return 0, "general"
    content = content.strip()
    if len(content) < miner.MIN_CHUNK_SIZE:
        return 0, "general"
    room = miner.detect_room(filepath, content, rooms, project_path)
    chunks = miner.chunk_text(content, source_file)
    with mine_lock(source_file):
        if file_already_mined(collection, source_file, check_mtime=True):
            return 0, room
        try:
            collection.delete(where={"source_file": source_file})
        except Exception:
            pass
        drawers_added = 0
        for chunk in chunks:
            miner.add_drawer(
                collection=collection,
                wing=wing,
                room=room,
                content=chunk["content"],
                source_file=source_file,
                chunk_index=chunk["chunk_index"],
                agent=agent,
            )
            drawers_added += 1
        if closets_col and drawers_added > 0:
            drawer_ids = [
                f"drawer_{wing}_{room}_"
                f"{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
                for c in chunks
            ]
            closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room)
            closet_id_base = (
                f"closet_{wing}_{room}_"
                f"{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
            )
            closet_meta = {
                "wing": wing,
                "room": room,
                "source_file": source_file,
                "drawer_count": drawers_added,
                "filed_at": datetime.now().isoformat(),
                "normalize_version": miner.NORMALIZE_VERSION,
            }
            purge_file_closets(closets_col, source_file)
            upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta)
    return drawers_added, room
 def mine_once(project_dir: str, palace_path: str, batched: bool) -> tuple[int, float]:
    """Mine a project dir with either the batched (new) or per-chunk (old) path."""
    from mempalace import miner
    from mempalace.miner import load_config, scan_project
    from mempalace.palace import get_closets_collection, get_collection
    project_path = Path(project_dir).resolve()
    config = load_config(project_dir)
    wing = config["wing"]
    rooms = config.get("rooms", [])
    files = scan_project(project_dir)
    collection = get_collection(palace_path)
    closets = get_closets_collection(palace_path)
    total = 0
    t0 = time.perf_counter()
    for filepath in files:
        if batched:
            drawers, _ = miner.process_file(
                filepath=filepath,
                project_path=project_path,
                collection=collection,
                wing=wing,
                rooms=rooms,
                agent="bench",
                dry_run=False,
                closets_col=closets,
            )
        else:
            drawers, _ = _process_file_unbatched(
                filepath, project_path, collection, wing, rooms, "bench", closets
            )
        total += drawers
    return total, time.perf_counter() - t0
 def _reset_backend_caches() -> None:
    """Drop the in-process client cache so each run pays cold-open cost equally."""
    from mempalace.palace import _DEFAULT_BACKEND
    _DEFAULT_BACKEND._clients.clear()
    _DEFAULT_BACKEND._freshness.clear()
 def run_scenario(label: str, n_files: int, paragraphs_per_file: int, seed: int) -> dict:
    """Run one scenario under both code paths and return a result dict."""
    print(f"\n=== {label}: {n_files} files × {paragraphs_per_file} paragraphs ===")
    results = {}
    for mode in ("unbatched", "batched"):
        tmp = Path(tempfile.mkdtemp(prefix=f"mp_{mode}_"))
        try:
            proj = tmp / "proj"
            palace = tmp / "palace"
            build_corpus(proj, n_files, paragraphs_per_file, seed=seed)
            _reset_backend_caches()
            drawers, dt = mine_once(str(proj), str(palace), batched=(mode == "batched"))
            rate = drawers / dt if dt > 0 else 0.0
            results[mode] = (drawers, dt, rate)
            print(f"  {mode:10} {drawers:5} drawers in {dt:6.2f}s  →  {rate:7.1f} drawers/sec")
        finally:
            shutil.rmtree(tmp, ignore_errors=True)
    _, t_u, r_u = results["unbatched"]
    d_b, t_b, r_b = results["batched"]
    speedup = t_u / t_b if t_b > 0 else 0.0
    print(f"  speedup:   {speedup:.2f}× ({t_u:.2f}s → {t_b:.2f}s)")
    return {
        "label": label,
        "n_files": n_files,
        "paragraphs": paragraphs_per_file,
        "drawers": d_b,
        "unbatched_time": t_u,
        "unbatched_rate": r_u,
        "batched_time": t_b,
        "batched_rate": r_b,
        "speedup": speedup,
    }
 SCENARIOS = {
    "small":  ("Small files (~50 paragraphs)",  10, 50),
    "medium": ("Medium files (~200 paragraphs)", 20, 200),
    "large":  ("Large files (~500 paragraphs)",  10, 500),
 }
 def _env_summary(device_label: str) -> list[str]:
    """Short hardware + version lines included with the printed table."""
    import platform
    try:
        import chromadb
        chromadb_v = chromadb.__version__
    except Exception:
        chromadb_v = "?"
    try:
        import onnxruntime as ort
        ort_v = ort.__version__
        providers = ",".join(p.replace("ExecutionProvider", "") for p in ort.get_available_providers())
    except Exception:
        ort_v = "?"
        providers = "?"
    return [
        f"device: **{device_label}** (onnxruntime {ort_v}, providers={providers})",
        f"chromadb {chromadb_v} · python {sys.version.split()[0]} · {platform.platform()}",
    ]
 def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0])
    parser.add_argument(
        "--device",
        default=None,
        help="Override MEMPALACE_EMBEDDING_DEVICE (cpu|cuda|coreml|dml|auto)",
    )
    parser.add_argument(
        "--scenarios",
        default="small,medium,large",
        help="Comma-separated scenario names (default: all)",
    )
    parser.add_argument("--seed", type=int, default=42)
    args = parser.parse_args()
    if args.device:
        os.environ["MEMPALACE_EMBEDDING_DEVICE"] = args.device
    from mempalace.embedding import describe_device, get_embedding_function
    device_label = describe_device()
    print(f"Warming up ONNX model on device={device_label}...")
    ef = get_embedding_function()
    ef(["warmup sentence one", "warmup sentence two"])
    picked = [s.strip() for s in args.scenarios.split(",") if s.strip()]
    results = []
    for key in picked:
        if key not in SCENARIOS:
            print(f"Unknown scenario {key!r}; choices: {sorted(SCENARIOS)}", file=sys.stderr)
            sys.exit(2)
        label, n_files, paras = SCENARIOS[key]
        results.append(run_scenario(label, n_files, paras, args.seed))
    print("\n\n## Mining benchmark\n")
    for line in _env_summary(device_label):
        print(line + "  ")
    print()
    print("| Scenario | Files | Drawers | Per-chunk (old) | Batched (new) | Speedup |")
    print("| --- | ---: | ---: | ---: | ---: | ---: |")
    for r in results:
        print(
            f"| {r['label']} | {r['n_files']} | {r['drawers']} | "
            f"{r['unbatched_time']:.2f}s · {r['unbatched_rate']:.0f} drw/s | "
            f"{r['batched_time']:.2f}s · {r['batched_rate']:.0f} drw/s | "
            f"**{r['speedup']:.2f}×** |"
        )
 if __name__ == "__main__":
    main()
@@ -405,6 +405,23 @@ class ChromaBackend(BaseBackend):
        self._freshness: dict[str, tuple[int, float]] = {}
        self._closed = False
    @staticmethod
    def _resolve_embedding_function():
        """Return the EF for the user's ``embedding_device`` setting.
        Both ``get_collection`` and ``get_or_create_collection`` must receive
        the EF explicitly — ChromaDB 1.x does not persist it with the
        collection, so a reader that omits the argument silently gets the
        library default and its queries won't match the writer's vectors.
        """
        try:
            from ..embedding import get_embedding_function
            return get_embedding_function()
        except Exception:
            logger.exception("Failed to build embedding function; using chromadb default")
            return None
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
@@ -532,12 +549,15 @@ class ChromaBackend(BaseBackend):
        if options and isinstance(options, dict):
            hnsw_space = options.get("hnsw_space", hnsw_space)
        ef = self._resolve_embedding_function()
        ef_kwargs = {"embedding_function": ef} if ef is not None else {}
        if create:
            collection = client.get_or_create_collection(
-                collection_name, metadata={"hnsw:space": hnsw_space}
+                collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
            )
        else:
-            collection = client.get_collection(collection_name)
+            collection = client.get_collection(collection_name, **ef_kwargs)
        return ChromaCollection(collection)
    def close_palace(self, palace) -> None:
@@ -578,8 +598,10 @@ class ChromaBackend(BaseBackend):
        self, palace_path: str, collection_name: str, hnsw_space: str = "cosine"
    ) -> ChromaCollection:
        """Create (not get-or-create) ``collection_name`` with the given HNSW space."""
        ef = self._resolve_embedding_function()
        ef_kwargs = {"embedding_function": ef} if ef is not None else {}
        collection = self._client(palace_path).create_collection(
-            collection_name, metadata={"hnsw:space": hnsw_space}
+            collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
        )
        return ChromaCollection(collection)
@@ -236,6 +236,23 @@ class MempalaceConfig:
            pass
        return normalized
    @property
    def embedding_device(self):
        """Hardware device for the ONNX embedding model.
        Values: ``"auto"`` (default), ``"cpu"``, ``"cuda"``, ``"coreml"``,
        ``"dml"``. Read from env ``MEMPALACE_EMBEDDING_DEVICE`` first, then
        ``embedding_device`` in ``config.json``, then ``"auto"``.
        ``auto`` resolves to the first available accelerator at runtime via
        :mod:`mempalace.embedding`; requesting an unavailable accelerator
        logs a warning and falls back to CPU.
        """
        env_val = os.environ.get("MEMPALACE_EMBEDDING_DEVICE")
        if env_val:
            return env_val.strip().lower()
        return str(self._file_config.get("embedding_device", "auto")).strip().lower()
    @property
    def hook_silent_save(self):
        """Whether the stop hook saves directly (True) or blocks for MCP calls (False)."""
@@ -332,31 +332,44 @@ def _file_chunks_locked(collection, source_file, chunks, wing, room, agent, extr
        except Exception:
            pass
        # Batch the whole file into one upsert so the embedding model runs
        # a single forward pass for all chunks — dramatically faster than
        # one call per chunk, especially on GPU where per-call overhead
        # dominates over the actual matmul.
        batch_docs: list = []
        batch_ids: list = []
        batch_metas: list = []
        filed_at = datetime.now().isoformat()
        for chunk in chunks:
            chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
            if extract_mode == "general":
                room_counts_delta[chunk_room] += 1
            drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
            batch_docs.append(chunk["content"])
            batch_ids.append(drawer_id)
            batch_metas.append(
                {
                    "wing": wing,
                    "room": chunk_room,
                    "hall": _detect_hall_cached(chunk["content"]),
                    "source_file": source_file,
                    "chunk_index": chunk["chunk_index"],
                    "added_by": agent,
                    "filed_at": filed_at,
                    "ingest_mode": "convos",
                    "extract_mode": extract_mode,
                    "normalize_version": NORMALIZE_VERSION,
                }
            )
        if batch_docs:
            try:
                collection.upsert(
-                    documents=[chunk["content"]],
+                    documents=batch_docs,
-                    ids=[drawer_id],
+                    ids=batch_ids,
-                    metadatas=[
+                    metadatas=batch_metas,
                        {
                            "wing": wing,
                            "room": chunk_room,
                            "hall": _detect_hall_cached(chunk["content"]),
                            "source_file": source_file,
                            "chunk_index": chunk["chunk_index"],
                            "added_by": agent,
                            "filed_at": datetime.now().isoformat(),
                            "ingest_mode": "convos",
                            "extract_mode": extract_mode,
                            "normalize_version": NORMALIZE_VERSION,
                        }
                    ],
                )
-                drawers_added += 1
+                drawers_added = len(batch_docs)
            except Exception as e:
                if "already exists" not in str(e).lower():
                    raise
@@ -0,0 +1,147 @@
 """Embedding function factory with hardware acceleration.
 Returns a ChromaDB-compatible embedding function bound to a user-selected
 ONNX Runtime execution provider. The same ``all-MiniLM-L6-v2`` model and
 384-dim vectors ChromaDB ships by default are reused, so switching device
 does not invalidate existing palaces.
 Supported devices (env ``MEMPALACE_EMBEDDING_DEVICE`` or ``embedding_device``
 in ``~/.mempalace/config.json``):
 * ``auto`` — prefer CUDA ▸ CoreML ▸ DirectML, fall back to CPU
 * ``cpu`` — force CPU (the historical default)
 * ``cuda`` — NVIDIA GPU via ``onnxruntime-gpu`` (``pip install mempalace[gpu]``)
 * ``coreml`` — Apple Neural Engine (macOS)
 * ``dml`` — DirectML (Windows / AMD / Intel GPUs)
 Requesting an unavailable accelerator emits a warning and falls back to CPU
 rather than hard-failing — mining must still work on a laptop without CUDA.
 """
 from __future__ import annotations
 import logging
 from typing import Optional
 logger = logging.getLogger(__name__)
 _PROVIDER_MAP = {
    "cpu": ["CPUExecutionProvider"],
    "cuda": ["CUDAExecutionProvider", "CPUExecutionProvider"],
    "coreml": ["CoreMLExecutionProvider", "CPUExecutionProvider"],
    "dml": ["DmlExecutionProvider", "CPUExecutionProvider"],
 }
 _AUTO_ORDER = [
    ("CUDAExecutionProvider", "cuda"),
    ("CoreMLExecutionProvider", "coreml"),
    ("DmlExecutionProvider", "dml"),
 ]
 _EF_CACHE: dict = {}
 _WARNED: set = set()
 def _resolve_providers(device: str) -> tuple[list, str]:
    """Return ``(provider_list, effective_device)`` for ``device``.
    Falls back to CPU (with a one-shot warning) when the requested
    accelerator is not compiled into the installed ``onnxruntime``.
    """
    device = (device or "auto").strip().lower()
    try:
        import onnxruntime as ort
        available = set(ort.get_available_providers())
    except ImportError:
        return (["CPUExecutionProvider"], "cpu")
    if device == "auto":
        for provider, name in _AUTO_ORDER:
            if provider in available:
                return ([provider, "CPUExecutionProvider"], name)
        return (["CPUExecutionProvider"], "cpu")
    requested = _PROVIDER_MAP.get(device)
    if requested is None:
        if device not in _WARNED:
            logger.warning("Unknown embedding_device %r — falling back to cpu", device)
            _WARNED.add(device)
        return (["CPUExecutionProvider"], "cpu")
    preferred = requested[0]
    if preferred == "CPUExecutionProvider":
        return (requested, "cpu")
    if preferred not in available:
        if device not in _WARNED:
            logger.warning(
                "embedding_device=%r requested but %s is not installed — "
                "falling back to CPU. Install mempalace[gpu] for CUDA.",
                device,
                preferred,
            )
            _WARNED.add(device)
        return (["CPUExecutionProvider"], "cpu")
    return (requested, device)
 def _build_ef_class():
    """Subclass ``ONNXMiniLM_L6_V2`` with name ``"default"``.
    Why the rename: ChromaDB 1.5 persists the EF identity on the collection
    and rejects reads that pass a differently-named EF (``onnx_mini_lm_l6_v2``
    vs ``default``). The vectors and model are identical — only the
    ``name()`` tag differs — so spoofing the name lets one EF class serve
    palaces created with ``DefaultEmbeddingFunction`` *and* palaces we
    create ourselves, with the same GPU-capable ``preferred_providers``.
    """
    from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
    class _MempalaceONNX(ONNXMiniLM_L6_V2):
        @staticmethod
        def name() -> str:
            return "default"
    return _MempalaceONNX
 def get_embedding_function(device: Optional[str] = None):
    """Return a cached embedding function bound to the requested device.
    ``device=None`` reads from :class:`MempalaceConfig.embedding_device`.
    The returned function is shared across calls with the same resolved
    provider list so we only pay model-load cost once per process.
    """
    if device is None:
        from .config import MempalaceConfig
        device = MempalaceConfig().embedding_device
    providers, effective = _resolve_providers(device)
    cache_key = tuple(providers)
    cached = _EF_CACHE.get(cache_key)
    if cached is not None:
        return cached
    ef_cls = _build_ef_class()
    ef = ef_cls(preferred_providers=providers)
    _EF_CACHE[cache_key] = ef
    logger.info("Embedding function initialized (device=%s providers=%s)", effective, providers)
    return ef
 def describe_device(device: Optional[str] = None) -> str:
    """Return a short human-readable label for the resolved device.
    Used by the miner CLI header so users can see at a glance whether GPU
    acceleration actually engaged.
    """
    if device is None:
        from .config import MempalaceConfig
        device = MempalaceConfig().embedding_device
    _, effective = _resolve_providers(device)
    return effective
@@ -14,6 +14,7 @@ import fnmatch
 from pathlib import Path
 from datetime import datetime
 from collections import defaultdict
 from typing import Optional
 from .palace import (
    NORMALIZE_VERSION,
@@ -633,40 +634,62 @@ def _extract_entities_for_metadata(content: str) -> str:
    return ";".join(capped)
 def _build_drawer_metadata(
    wing: str,
    room: str,
    source_file: str,
    chunk_index: int,
    agent: str,
    content: str,
    source_mtime: Optional[float],
 ) -> dict:
    """Build the metadata dict for one drawer without upserting.
    Split out from ``add_drawer`` so ``process_file`` can batch all chunks
    of a file into a single ``collection.upsert`` — one embedding forward
    pass per batch instead of per chunk.
    """
    metadata = {
        "wing": wing,
        "room": room,
        "source_file": source_file,
        "chunk_index": chunk_index,
        "added_by": agent,
        "filed_at": datetime.now().isoformat(),
        "normalize_version": NORMALIZE_VERSION,
    }
    if source_mtime is not None:
        metadata["source_mtime"] = source_mtime
    metadata["hall"] = detect_hall(content)
    entities = _extract_entities_for_metadata(content)
    if entities:
        metadata["entities"] = entities
    return metadata
 def add_drawer(
    collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
 ):
-    """Add one drawer to the palace."""
+    """Add one drawer to the palace.
    Kept for backward compatibility with external callers. In-tree the
    miner uses ``_build_drawer_metadata`` + a batched ``collection.upsert``
    to amortize the embedding model's forward-pass cost across chunks.
    """
    drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk_index)).encode()).hexdigest()[:24]}"
    try:
-        metadata = {
+        source_mtime = os.path.getmtime(source_file)
-            "wing": wing,
+    except OSError:
-            "room": room,
+        source_mtime = None
-            "source_file": source_file,
+    metadata = _build_drawer_metadata(
-            "chunk_index": chunk_index,
+        wing, room, source_file, chunk_index, agent, content, source_mtime
-            "added_by": agent,
+    )
-            "filed_at": datetime.now().isoformat(),
+    collection.upsert(
-            "normalize_version": NORMALIZE_VERSION,
+        documents=[content],
-        }
+        ids=[drawer_id],
-        # Store file mtime so we can detect modifications later.
+        metadatas=[metadata],
-        try:
+    )
-            metadata["source_mtime"] = os.path.getmtime(source_file)
+    return True
        except OSError:
            pass
        # Tag with hall for graph connectivity within wings
        metadata["hall"] = detect_hall(content)
        # Tag with entity names for filterable search
        entities = _extract_entities_for_metadata(content)
        if entities:
            metadata["entities"] = entities
        collection.upsert(
            documents=[content],
            ids=[drawer_id],
            metadatas=[metadata],
        )
        return True
    except Exception:
        raise
 # =============================================================================
@@ -725,19 +748,42 @@ def process_file(
        except Exception:
            pass
-        drawers_added = 0
+        # Batch all chunks of this file into a single upsert so the embedding
        # model runs one forward pass over the whole file instead of N passes
        # of one chunk each. On CPU this is typically a 10-30x speedup; on
        # GPU the speedup is larger because per-call overhead dominates.
        try:
            source_mtime = os.path.getmtime(source_file)
        except OSError:
            source_mtime = None
        batch_docs: list = []
        batch_ids: list = []
        batch_metas: list = []
        for chunk in chunks:
-            added = add_drawer(
+            drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
-                collection=collection,
+            batch_docs.append(chunk["content"])
-                wing=wing,
+            batch_ids.append(drawer_id)
-                room=room,
+            batch_metas.append(
-                content=chunk["content"],
+                _build_drawer_metadata(
-                source_file=source_file,
+                    wing,
-                chunk_index=chunk["chunk_index"],
+                    room,
-                agent=agent,
+                    source_file,
                    chunk["chunk_index"],
                    agent,
                    chunk["content"],
                    source_mtime,
                )
            )
-            if added:
+
-                drawers_added += 1
+        drawers_added = 0
        if batch_docs:
            collection.upsert(
                documents=batch_docs,
                ids=batch_ids,
                metadatas=batch_metas,
            )
            drawers_added = len(batch_docs)
        # Build closet — the searchable index pointing to these drawers.
        # Purge first: a re-mine (mtime change or normalize_version bump) must
@@ -868,6 +914,8 @@ def mine(
    if limit > 0:
        files = files[:limit]
    from .embedding import describe_device
    print(f"\n{'=' * 55}")
    print("  MemPalace Mine")
    print(f"{'=' * 55}")
@@ -875,6 +923,7 @@ def mine(
    print(f"  Rooms:   {', '.join(r['name'] for r in rooms)}")
    print(f"  Files:   {len(files)}")
    print(f"  Palace:  {palace_path}")
    print(f"  Device:  {describe_device()}")
    if dry_run:
        print("  DRY RUN — nothing will be filed")
    if not respect_gitignore:
@@ -53,6 +53,14 @@ chroma = "mempalace.backends.chroma:ChromaBackend"
 [project.optional-dependencies]
 dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
 spellcheck = ["autocorrect>=2.0"]
 # Hardware acceleration for the ONNX embedding model. Install exactly one:
 #   pip install mempalace[gpu]       — NVIDIA CUDA
 #   pip install mempalace[dml]       — DirectML (Windows AMD/Intel/NVIDIA)
 #   pip install mempalace[coreml]    — macOS Neural Engine
 # After install, set MEMPALACE_EMBEDDING_DEVICE=cuda|dml|coreml (or "auto").
 gpu = ["onnxruntime-gpu>=1.16"]
 dml = ["onnxruntime-directml>=1.16"]
 coreml = ["onnxruntime>=1.16"]
 [dependency-groups]
 dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
@@ -1178,6 +1178,11 @@ dependencies = [
 ]
 [package.optional-dependencies]
 coreml = [
    { name = "onnxruntime", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "onnxruntime", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
    { name = "onnxruntime", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 dev = [
    { name = "psutil" },
    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -1185,6 +1190,16 @@ dev = [
    { name = "pytest-cov" },
    { name = "ruff" },
 ]
 dml = [
    { name = "onnxruntime-directml", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "onnxruntime-directml", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
    { name = "onnxruntime-directml", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 gpu = [
    { name = "onnxruntime-gpu", version = "1.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "onnxruntime-gpu", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
    { name = "onnxruntime-gpu", version = "1.25.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 spellcheck = [
    { name = "autocorrect" },
 ]
@@ -1202,6 +1217,9 @@ dev = [
 requires-dist = [
    { name = "autocorrect", marker = "extra == 'spellcheck'", specifier = ">=2.0" },
    { name = "chromadb", specifier = ">=1.5.4,<2" },
    { name = "onnxruntime", marker = "extra == 'coreml'", specifier = ">=1.16" },
    { name = "onnxruntime-directml", marker = "extra == 'dml'", specifier = ">=1.16" },
    { name = "onnxruntime-gpu", marker = "extra == 'gpu'", specifier = ">=1.16" },
    { name = "psutil", marker = "extra == 'dev'", specifier = ">=5.9" },
    { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
    { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
@@ -1209,7 +1227,7 @@ requires-dist = [
    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
    { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.0" },
 ]
-provides-extras = ["dev", "spellcheck"]
+provides-extras = ["dev", "spellcheck", "gpu", "dml", "coreml"]
 [package.metadata.requires-dev]
 dev = [
@@ -1815,6 +1833,154 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/6c/1d/1666dc64e78d8587d168fec4e3b7922b92eb286a2ddeebcf6acb55c7dc82/onnxruntime-1.24.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1cc6a518255f012134bc791975a6294806be9a3b20c4a54cca25194c90cf731", size = 17247021, upload-time = "2026-03-17T22:04:52.377Z" },
 ]
 [[package]]
 name = "onnxruntime-directml"
 version = "1.20.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version < '3.10'",
 ]
 dependencies = [
    { name = "coloredlogs", marker = "python_full_version < '3.10'" },
    { name = "flatbuffers", marker = "python_full_version < '3.10'" },
    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "packaging", marker = "python_full_version < '3.10'" },
    { name = "protobuf", marker = "python_full_version < '3.10'" },
    { name = "sympy", marker = "python_full_version < '3.10'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/3c/4f/f433239b05304aa9af0217da20508abbbcec1dcd58ee821e3dab8939ecfe/onnxruntime_directml-1.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:d4aa43694799559fb5570fdf0e96a154d4b4d0bb9b73c3e81744eb7fe0c0de8d", size = 22760521, upload-time = "2024-11-21T00:49:40.179Z" },
    { url = "https://files.pythonhosted.org/packages/df/5f/16337318bd99d2d837cbb2e91e8a12b0915cb80d7c1ae8f80ca2f5d47a09/onnxruntime_directml-1.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:c7861057ad4caa64186c910efb3b54c1f575cd0e64732509c9bd927d2d20187b", size = 22762384, upload-time = "2024-11-21T00:49:44.01Z" },
    { url = "https://files.pythonhosted.org/packages/8f/50/4599c6573bd71cc0c80820c63dea599a0b489ce874f93a5e021ca20a9e1f/onnxruntime_directml-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:4b9a9f8349d68eef947fc692b3572e7a6490cb95effb151ace1a6ffc15884940", size = 22764330, upload-time = "2024-11-21T00:49:47.264Z" },
    { url = "https://files.pythonhosted.org/packages/60/40/7d8489d9101b4aa7bae29227075ce31bc5764cbe87b78c995fdb296e3eff/onnxruntime_directml-1.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:86a8c4b69e377bb18ed2a18aaf2337baa83a57ff87a97224d027e546dfa99fde", size = 22764517, upload-time = "2024-11-21T00:49:50.213Z" },
 ]
 [[package]]
 name = "onnxruntime-directml"
 version = "1.24.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version == '3.10.*'",
 ]
 dependencies = [
    { name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
    { name = "packaging", marker = "python_full_version == '3.10.*'" },
    { name = "protobuf", marker = "python_full_version == '3.10.*'" },
    { name = "sympy", marker = "python_full_version == '3.10.*'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/ed/65/36ce5a5e79fb5d7b4d7636bc6e6c4024f3ff0571789e8eedb7149bb7c538/onnxruntime_directml-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:442fecea5d52df315b6cecfbcbb44aff6681880b6bbf23546a6c00125fec66f1", size = 25106769, upload-time = "2026-03-05T16:27:07.495Z" },
    { url = "https://files.pythonhosted.org/packages/05/40/c948c0ee42b7b6297dd45956092f5a53a6954610c3911a5847c7555b4930/onnxruntime_directml-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d889010e6ed2f30026522308173d295bcfdaf6f28d1df6054c748ffa750a7ad5", size = 25114531, upload-time = "2026-03-05T16:27:11.256Z" },
    { url = "https://files.pythonhosted.org/packages/56/f0/9de329f39a66142aab4c1d9a48edc0e432de27c6ba09e8039e0dc51885e7/onnxruntime_directml-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:f684adcb29dd48ee172b52fcf1d19a1da1a67a051384ac3418b36d200d0d105c", size = 25114902, upload-time = "2026-03-05T16:27:13.925Z" },
    { url = "https://files.pythonhosted.org/packages/fe/7a/8b3014ca4065a32bd6672221bf4cb0b5b9a726d28a9caafdb86a076a5981/onnxruntime_directml-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:42b17de7030445e75a7e83a4a317f9c655ed2dd7045fe79a7a21dce7b60103b6", size = 25570589, upload-time = "2026-03-05T16:27:17.278Z" },
 ]
 [[package]]
 name = "onnxruntime-directml"
 version = "1.24.4"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.14'",
    "python_full_version == '3.13.*'",
    "python_full_version >= '3.11' and python_full_version < '3.13'",
 ]
 dependencies = [
    { name = "flatbuffers", marker = "python_full_version >= '3.11'" },
    { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
    { name = "packaging", marker = "python_full_version >= '3.11'" },
    { name = "protobuf", marker = "python_full_version >= '3.11'" },
    { name = "sympy", marker = "python_full_version >= '3.11'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/be/90/99566dc6398028e7691a5b12720fd85f757a0901818b84599d28abb3f085/onnxruntime_directml-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:96642a787e5a6f33bf043521c0f06eb1eb663f6b830e5862a2026d03f9c90543", size = 25106000, upload-time = "2026-03-17T21:47:15.438Z" },
    { url = "https://files.pythonhosted.org/packages/88/ea/33814eb0ec96775eda4c1d30b0d86e91d7d2cd0d84c66d3915aef0e06fa3/onnxruntime_directml-1.24.4-cp312-cp312-win_amd64.whl", hash = "sha256:f2ecb68b7b7b259d2ef3112ae760149f9b5a1e7c0fbb73d539da6250a648a614", size = 25111930, upload-time = "2026-03-17T21:47:18.419Z" },
    { url = "https://files.pythonhosted.org/packages/60/53/2bd2696fac19cf8ca55496a0bcfe431f3aff9579eabbb0e231dc238acf6f/onnxruntime_directml-1.24.4-cp313-cp313-win_amd64.whl", hash = "sha256:2f1031cb2281e5b27cca9efe0b9399317c7286e4d226f7a79d4ab79bbd94d19e", size = 25112253, upload-time = "2026-03-17T21:47:22.043Z" },
    { url = "https://files.pythonhosted.org/packages/b7/04/816932a3ade867a687e406716ca76e0774c6b921545b45818e3ebfcc54ce/onnxruntime_directml-1.24.4-cp314-cp314-win_amd64.whl", hash = "sha256:51d86bb949488e572b00422f344990a4a81d982416d73b6c0e4ced2bcd423d19", size = 25571098, upload-time = "2026-03-17T21:47:25.461Z" },
 ]
 [[package]]
 name = "onnxruntime-gpu"
 version = "1.20.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version < '3.10'",
 ]
 dependencies = [
    { name = "coloredlogs", marker = "python_full_version < '3.10'" },
    { name = "flatbuffers", marker = "python_full_version < '3.10'" },
    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "packaging", marker = "python_full_version < '3.10'" },
    { name = "protobuf", marker = "python_full_version < '3.10'" },
    { name = "sympy", marker = "python_full_version < '3.10'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/04/ad/4e5534dcaafe36f596792ebd0049177f7f0b7afa0f696505974ed1d6f72c/onnxruntime_gpu-1.20.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dfba508f110ec062dedfd3032e6eee8cde325026e9d7c5792884e8b9d4ebb9c3", size = 291522233, upload-time = "2025-03-07T05:46:08.901Z" },
    { url = "https://files.pythonhosted.org/packages/a5/2a/8afc5aee996fd33fb816bc3067fdbde96a2a7520d4c275fa502f3aef7e54/onnxruntime_gpu-1.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:75a7557292b2741e63fb73236ee84faa08075cead52d9a8d302a67036fc64f16", size = 279696089, upload-time = "2025-03-07T05:39:24.924Z" },
    { url = "https://files.pythonhosted.org/packages/5e/53/9341b875b0ed29953485b43713e94b335a449c3770fed67dddb3c9b84af0/onnxruntime_gpu-1.20.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85057c7006457bee14fc2a57417b7e4f396f10d9c1b08b11aae08ac2b825eeda", size = 291518407, upload-time = "2025-03-07T05:46:22.943Z" },
    { url = "https://files.pythonhosted.org/packages/0b/7a/0999993ceae7bf191d5d63a4e1b2208596763d8e586aa7dc5cc091f960c0/onnxruntime_gpu-1.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:d0eafd873e4336949c89e6c7429a68e7e1d0233d9cb363e9780ca76c3c6f865c", size = 279697437, upload-time = "2025-03-07T05:39:38.418Z" },
    { url = "https://files.pythonhosted.org/packages/5b/db/c1fcdf45cad147d3b3609cf66a1c6083b54382f58a41d7fc526cd5909090/onnxruntime_gpu-1.20.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa66d2e6de13fe6f4d1554b1c219bd2e4778b540ed9d3dc62957c95a8af43d66", size = 291510804, upload-time = "2025-03-07T05:46:36.178Z" },
    { url = "https://files.pythonhosted.org/packages/27/67/4f979650557738a8b148dd7e0b82522d20ffcfb2c3964141c861a61e82c7/onnxruntime_gpu-1.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:564a6a1187b208012f57c3bb3723ba65f6bc5cddff6e6b917ac96865768b39f5", size = 279699596, upload-time = "2025-03-07T05:39:50.858Z" },
    { url = "https://files.pythonhosted.org/packages/48/a4/60f0cf16b24f05d123f90525408a705741fa92e0c38ab122cdf1d239e3fe/onnxruntime_gpu-1.20.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6af5b30b9b0e729d3ca1dfff493a39771f143cfc22af1d77d487022033cae284", size = 291511859, upload-time = "2025-03-07T05:46:49.302Z" },
    { url = "https://files.pythonhosted.org/packages/ab/a2/0eb7a3fa417adc7af0be73b0ea35f1f0d6f92e3722eb6468e36dfe2e762d/onnxruntime_gpu-1.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:6ffe5108d2dbd96a9a40bf76573219e04b67d0330aa93ca5114f1478185ade19", size = 279697061, upload-time = "2025-03-07T05:40:03.559Z" },
    { url = "https://files.pythonhosted.org/packages/4e/de/6c692ac8604a451011a2a01e35e94f84bea8775ef97f6830985bbe8de172/onnxruntime_gpu-1.20.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:407e5b7a21d656aac6f994d2e329f5577eb3d7f98b63aa1e49e71a702ffa1da1", size = 291502464, upload-time = "2025-03-07T05:47:03.191Z" },
 ]
 [[package]]
 name = "onnxruntime-gpu"
 version = "1.24.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version == '3.10.*'",
 ]
 dependencies = [
    { name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
    { name = "packaging", marker = "python_full_version == '3.10.*'" },
    { name = "protobuf", marker = "python_full_version == '3.10.*'" },
    { name = "sympy", marker = "python_full_version == '3.10.*'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/28/f4/c8050f3f4916ab6c75432724f0ba51c1548dc1c3d66d40c0f8a9611e370f/onnxruntime_gpu-1.24.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac922633819e1cdc81c9b3a28b5e37d788805307bbaa708a01a3d7150e345625", size = 252750845, upload-time = "2026-03-05T16:35:33.604Z" },
    { url = "https://files.pythonhosted.org/packages/07/b7/81e8936354651915192a362a1718253c6d03da6b902a95237aa392b1d260/onnxruntime_gpu-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:0fe6ece3042db149f36f4991cbebd19a690b7ffd82af89450a261b47f4704a37", size = 207192429, upload-time = "2026-03-05T16:39:57.015Z" },
    { url = "https://files.pythonhosted.org/packages/24/fa/58ceca812214c9c1a286407c376e42e0b7de3e2c6e14b61cdf3caf6d6d9c/onnxruntime_gpu-1.24.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:537bdd6d95006a9200ae81f2e73ba9e621e723fdf0deb5901e2e62fb2cccf876", size = 252756089, upload-time = "2026-03-05T16:35:46.004Z" },
    { url = "https://files.pythonhosted.org/packages/3c/07/2f36920b513bd8939e25591153e37d9cfda94115bd119f2874da0750fce2/onnxruntime_gpu-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d72065b3ab5fdaef74d8b6b8f39b7ce20d89731610e3e63cb40e997d3dce177e", size = 207197001, upload-time = "2026-03-05T16:40:05.691Z" },
    { url = "https://files.pythonhosted.org/packages/49/57/9e6206dac76e08f028d2ae95f2ab1b3a7c3317fb6c0374a530aad48dab5c/onnxruntime_gpu-1.24.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3242a70010934e5bb0aeaa9dde4c25c6c2da577b55c6308c0caa828ba3b7be23", size = 252753349, upload-time = "2026-03-05T16:35:58.09Z" },
    { url = "https://files.pythonhosted.org/packages/4e/ae/f0be395602c13a3a8d22fa6632133550a64536c58bc3623abbba5d0a575e/onnxruntime_gpu-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:a423b164dbc26cb7f8736367b11698c2a7294748d3c144c39542ecac28d225c9", size = 207197331, upload-time = "2026-03-05T16:40:14.944Z" },
    { url = "https://files.pythonhosted.org/packages/b4/af/a64c9789769d8d7fabc6d35dcce2f2897b2d9e0fe113044efc2903f7cd07/onnxruntime_gpu-1.24.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9696d54974a1313ef0d87f4cbd04f9abfd13839194638d52bb5967a15615341d", size = 252762923, upload-time = "2026-03-05T16:36:10.043Z" },
    { url = "https://files.pythonhosted.org/packages/c1/bb/1cf7dffac2fb01e8de9f0882438165f7543f0aab57f86d1f587e6faa8528/onnxruntime_gpu-1.24.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8ca744f40b33380bc9136988213e574c927d2b919ed42149977e006b138f74f", size = 252754914, upload-time = "2026-03-05T16:36:30.739Z" },
    { url = "https://files.pythonhosted.org/packages/cf/39/3949d56103bd9cd9381de59b060f9bce8dc2c7363f465bf207ebd0c7a5d0/onnxruntime_gpu-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:c60c44e2b388720e6670a948b52626f3d089e960ef7da66e4fa6b2b33a11116f", size = 209599131, upload-time = "2026-03-05T16:40:24.074Z" },
    { url = "https://files.pythonhosted.org/packages/f3/60/51bfbcf2d0540dbfa426a73a9b80046b71a63de7303d16c0f2682c8edfd2/onnxruntime_gpu-1.24.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29048407a2398361d93de5537c2d2079d79d720337a0743d4a2cc28db981e776", size = 252764115, upload-time = "2026-03-05T16:36:44.681Z" },
 ]
 [[package]]
 name = "onnxruntime-gpu"
 version = "1.25.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.14'",
    "python_full_version == '3.13.*'",
    "python_full_version >= '3.11' and python_full_version < '3.13'",
 ]
 dependencies = [
    { name = "flatbuffers", marker = "python_full_version >= '3.11'" },
    { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
    { name = "packaging", marker = "python_full_version >= '3.11'" },
    { name = "protobuf", marker = "python_full_version >= '3.11'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/2d/7e/f58f8fc505a876b31fd2a34c1eb8f9863b75bf1589c3297c8efd48b93151/onnxruntime_gpu-1.25.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8625bb31ee2d88524414e7458cc604f4f958f323ef8832cc00882f6cd42b9a1", size = 270337732, upload-time = "2026-04-22T17:27:59.993Z" },
    { url = "https://files.pythonhosted.org/packages/55/5d/2561b3aa667d87a4ae9cd01c5a565955aab5a3d44a6076f723beb9cdde0a/onnxruntime_gpu-1.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:2e702159a025aa5c69f0b747adf9a451e0c9e4b20120163a918c8459d3171b87", size = 220845585, upload-time = "2026-04-22T17:20:38.939Z" },
    { url = "https://files.pythonhosted.org/packages/1d/6d/2c13d3eff74caa9e59820a044a75becd34e9cbeeaf7617ad7679cdb1fdb7/onnxruntime_gpu-1.25.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f0c36c63c8b0eb4091f2567067f480f66f0aedc189eb009545c98ce7e919056", size = 270342429, upload-time = "2026-04-22T17:28:10.526Z" },
    { url = "https://files.pythonhosted.org/packages/8c/2e/9fc303ae59d4caeb85ec3cea6881b7de8ca1d2a07140fade39913cd7ff10/onnxruntime_gpu-1.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:61178cc4d84f59861714554531e01cccbd33ddf13cc0e87a3adea13b24d297ce", size = 220847708, upload-time = "2026-04-22T17:20:47.993Z" },
    { url = "https://files.pythonhosted.org/packages/f5/15/e63fe7b1abad6884bed07e9bb333e9f0ea48fbb8cbc1ea4a67ee6019d5d0/onnxruntime_gpu-1.25.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e462eb13ee9955117baec4f518916c1e7cb1a96001114105632bc6d454c6aee6", size = 270342324, upload-time = "2026-04-22T17:28:21.142Z" },
    { url = "https://files.pythonhosted.org/packages/21/10/b3533243d062b589d4b1f3ae26584af332c5cde618e7f6f5ff6fabbfd5f2/onnxruntime_gpu-1.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a3682158e5e911385252eb95d6332b6f525972746c582e10f8a78213b39e624", size = 220848188, upload-time = "2026-04-22T17:20:56.946Z" },
    { url = "https://files.pythonhosted.org/packages/35/6c/d7706dd1d0eaafdba44d5c89f8d952de41e425a1b0cbd3ecfa60f918c249/onnxruntime_gpu-1.25.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8514b92c5929c953850090d823d018770cba2a971efab5f8f69a3c4280cdc632", size = 270364210, upload-time = "2026-04-22T17:28:33.568Z" },
    { url = "https://files.pythonhosted.org/packages/37/01/9f1b16ea857e3a4b5e82a2d70b52ea46a0083569f737d840f74a1b86818f/onnxruntime_gpu-1.25.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffe9df4016b061ec3a5565a4fc08cdb86808cd8b9c255c42301066c0c24a81b5", size = 270345126, upload-time = "2026-04-22T17:28:44.416Z" },
    { url = "https://files.pythonhosted.org/packages/56/c8/aae22f3c9cea9160d8d969734a1927720fcb4d4ad4abe269c407c1d2b63c/onnxruntime_gpu-1.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:2173b71631208177fe704ce2d92eac3acbf758285327247ea40a31a9f0bcc073", size = 223385369, upload-time = "2026-04-22T17:21:06.026Z" },
    { url = "https://files.pythonhosted.org/packages/ed/0a/79fba6a1a32803a2bf8b99187e0ea5d5d69ffe0c5c0f469bde232ceb8327/onnxruntime_gpu-1.25.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8576c721c600cc669717a2ae49af30fdfff230480099653adc7b79d58a240852", size = 270364130, upload-time = "2026-04-22T17:28:54.708Z" },
 ]
 [[package]]
 name = "opentelemetry-api"
 version = "1.40.0"