a4868a3589
The miner upserted one drawer per ChromaDB call, paying tokenizer + ONNX session setup per chunk. The embedding device was CPU-only because no EmbeddingFunction was ever wired through the backend. Two changes, each a speedup in its own right; stacked they give ~10x end-to-end on a medium corpus (20 files, 568 drawers): 1. Batched upsert. `process_file` and `_file_chunks_locked` now collect all chunks of a file into a single `collection.upsert(...)` so the embedding model runs one forward pass per file instead of N. 2. Hardware-accelerated embedding function. New `mempalace/embedding.py` wraps `ONNXMiniLM_L6_V2` with configurable `preferred_providers`. `MEMPALACE_EMBEDDING_DEVICE` (or `embedding_device` in config.json) selects auto / cpu / cuda / coreml / dml. Unavailable accelerators log a warning and fall back to CPU. The factory subclasses `ONNXMiniLM_L6_V2` and spoofs its `name()` to `"default"` so the persisted EF identity matches existing palaces created with ChromaDB's bare `DefaultEmbeddingFunction` -- same model, same 384-dim vectors, no rebuild needed when turning GPU on. `ChromaBackend.get_collection` / `create_collection` now pass the resolved EF on every call so miner writes and searcher reads agree. Benchmarks (i9-12900KF + RTX 3090, medium scenario, 568 drawers): per-chunk + CPU 19.77s · 29 drw/s (baseline) batched + CPU 8.07s · 70 drw/s (2.4x) batched + CUDA 2.15s · 264 drw/s (9.2x) Reproducible via `benchmarks/mine_bench.py`. Install paths: pip install mempalace[gpu] # NVIDIA CUDA pip install mempalace[dml] # DirectML (Windows) pip install mempalace[coreml] # macOS Neural Engine Mine header now prints `Device: cpu|cuda|...` so users can confirm the accelerator engaged.
110 lines
3.2 KiB
TOML
110 lines
3.2 KiB
TOML
[project]
|
|
name = "mempalace"
|
|
version = "3.3.3"
|
|
description = "Give your AI a memory — mine projects and conversations into a searchable palace. No API key required."
|
|
readme = "README.md"
|
|
requires-python = ">=3.9"
|
|
license = "MIT"
|
|
authors = [
|
|
{name = "milla-jovovich"},
|
|
]
|
|
keywords = [
|
|
"ai", "memory", "llm", "rag", "chromadb", "mcp",
|
|
"vector-database", "claude", "chatgpt", "embeddings",
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 4 - Beta",
|
|
"Environment :: Console",
|
|
"Intended Audience :: Developers",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.9",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Programming Language :: Python :: 3.13",
|
|
"Programming Language :: Python :: 3.14",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
"Topic :: Utilities",
|
|
]
|
|
dependencies = [
|
|
"chromadb>=1.5.4,<2",
|
|
"pyyaml>=6.0,<7",
|
|
"tomli>=2.0.0; python_version < '3.11'",
|
|
]
|
|
|
|
[project.urls]
|
|
Homepage = "https://github.com/MemPalace/mempalace"
|
|
Repository = "https://github.com/MemPalace/mempalace"
|
|
"Bug Tracker" = "https://github.com/MemPalace/mempalace/issues"
|
|
|
|
[project.scripts]
|
|
mempalace = "mempalace.cli:main"
|
|
mempalace-mcp = "mempalace.mcp_server:main"
|
|
|
|
[project.entry-points."mempalace.backends"]
|
|
chroma = "mempalace.backends.chroma:ChromaBackend"
|
|
|
|
# RFC 002 source-adapter entry-point group. Core publishes no first-party
|
|
# adapters under this group yet; ``miner.py`` and ``convo_miner.py`` migrate
|
|
# onto ``BaseSourceAdapter`` in a follow-up PR. Third-party adapter packages
|
|
# (``mempalace-source-cursor``, ``mempalace-source-git``, …) register here.
|
|
[project.entry-points."mempalace.sources"]
|
|
|
|
[project.optional-dependencies]
|
|
dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
|
|
spellcheck = ["autocorrect>=2.0"]
|
|
# Hardware acceleration for the ONNX embedding model. Install exactly one:
|
|
# pip install mempalace[gpu] — NVIDIA CUDA
|
|
# pip install mempalace[dml] — DirectML (Windows AMD/Intel/NVIDIA)
|
|
# pip install mempalace[coreml] — macOS Neural Engine
|
|
# After install, set MEMPALACE_EMBEDDING_DEVICE=cuda|dml|coreml (or "auto").
|
|
gpu = ["onnxruntime-gpu>=1.16"]
|
|
dml = ["onnxruntime-directml>=1.16"]
|
|
coreml = ["onnxruntime>=1.16"]
|
|
|
|
[dependency-groups]
|
|
dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["mempalace"]
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
target-version = "py39"
|
|
extend-exclude = ["benchmarks"]
|
|
|
|
[tool.ruff.lint]
|
|
select = ["E", "F", "W", "C901"]
|
|
ignore = ["E501"]
|
|
|
|
[tool.ruff.lint.mccabe]
|
|
max-complexity = 25
|
|
|
|
[tool.ruff.format]
|
|
quote-style = "double"
|
|
|
|
[tool.pytest.ini_options]
|
|
testpaths = ["tests"]
|
|
pythonpath = ["."]
|
|
addopts = "-m 'not benchmark and not slow and not stress'"
|
|
markers = [
|
|
"benchmark: scale/performance benchmark tests",
|
|
"slow: tests that take more than 30 seconds",
|
|
"stress: destructive scale tests (100K+ drawers)",
|
|
]
|
|
|
|
[tool.coverage.run]
|
|
source = ["mempalace"]
|
|
|
|
[tool.coverage.report]
|
|
fail_under = 85
|
|
show_missing = true
|
|
exclude_lines = [
|
|
"if __name__",
|
|
"pragma: no cover",
|
|
]
|