2026-04-18 12:45:16 -03:00
|
|
|
"""ChromaDB-backed MemPalace storage backend (RFC 001 reference implementation)."""
|
2026-04-11 19:16:49 -04:00
|
|
|
|
2026-04-11 23:06:01 -07:00
|
|
|
import logging
|
2026-04-11 19:16:49 -04:00
|
|
|
import os
|
2026-04-11 23:06:01 -07:00
|
|
|
import sqlite3
|
2026-04-18 12:45:16 -03:00
|
|
|
from typing import Any, Optional
|
2026-04-11 19:16:49 -04:00
|
|
|
|
|
|
|
|
import chromadb
|
|
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
from .base import (
|
|
|
|
|
BaseBackend,
|
|
|
|
|
BaseCollection,
|
|
|
|
|
GetResult,
|
|
|
|
|
HealthStatus,
|
|
|
|
|
PalaceNotFoundError,
|
|
|
|
|
PalaceRef,
|
|
|
|
|
QueryResult,
|
|
|
|
|
UnsupportedFilterError,
|
|
|
|
|
_IncludeSpec,
|
|
|
|
|
)
|
2026-04-11 19:16:49 -04:00
|
|
|
|
2026-04-11 23:06:01 -07:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
_REQUIRED_OPERATORS = frozenset({"$eq", "$ne", "$in", "$nin", "$and", "$or", "$contains"})
|
|
|
|
|
_OPTIONAL_OPERATORS = frozenset({"$gt", "$gte", "$lt", "$lte"})
|
|
|
|
|
_SUPPORTED_OPERATORS = _REQUIRED_OPERATORS | _OPTIONAL_OPERATORS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_where(where: Optional[dict]) -> None:
|
|
|
|
|
"""Scan a where-clause for unknown operators and raise ``UnsupportedFilterError``.
|
|
|
|
|
|
|
|
|
|
Spec (RFC 001 §1.4): silent dropping of unknown operators is forbidden.
|
|
|
|
|
"""
|
|
|
|
|
if not where:
|
|
|
|
|
return
|
|
|
|
|
stack = [where]
|
|
|
|
|
while stack:
|
|
|
|
|
node = stack.pop()
|
|
|
|
|
if not isinstance(node, dict):
|
|
|
|
|
continue
|
|
|
|
|
for k, v in node.items():
|
|
|
|
|
if k.startswith("$") and k not in _SUPPORTED_OPERATORS:
|
|
|
|
|
raise UnsupportedFilterError(f"operator {k!r} not supported by chroma backend")
|
|
|
|
|
if isinstance(v, dict):
|
|
|
|
|
stack.append(v)
|
|
|
|
|
elif isinstance(v, list):
|
|
|
|
|
stack.extend(x for x in v if isinstance(x, dict))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _fix_blob_seq_ids(palace_path: str) -> None:
|
2026-04-11 23:06:01 -07:00
|
|
|
"""Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER.
|
|
|
|
|
|
|
|
|
|
ChromaDB 0.6.x stored seq_id as big-endian 8-byte BLOBs. ChromaDB 1.5.x
|
|
|
|
|
expects INTEGER. The auto-migration doesn't convert existing rows, causing
|
|
|
|
|
the Rust compactor to crash with "mismatched types; Rust type u64 (as SQL
|
|
|
|
|
type INTEGER) is not compatible with SQL type BLOB".
|
|
|
|
|
|
|
|
|
|
Must run BEFORE PersistentClient is created (the compactor fires on init).
|
|
|
|
|
"""
|
|
|
|
|
db_path = os.path.join(palace_path, "chroma.sqlite3")
|
|
|
|
|
if not os.path.isfile(db_path):
|
|
|
|
|
return
|
|
|
|
|
try:
|
|
|
|
|
with sqlite3.connect(db_path) as conn:
|
|
|
|
|
for table in ("embeddings", "max_seq_id"):
|
|
|
|
|
try:
|
|
|
|
|
rows = conn.execute(
|
|
|
|
|
f"SELECT rowid, seq_id FROM {table} WHERE typeof(seq_id) = 'blob'"
|
|
|
|
|
).fetchall()
|
|
|
|
|
except sqlite3.OperationalError:
|
|
|
|
|
continue
|
|
|
|
|
if not rows:
|
|
|
|
|
continue
|
2026-04-13 18:29:48 -04:00
|
|
|
updates = [(int.from_bytes(blob, byteorder="big"), rowid) for rowid, blob in rows]
|
|
|
|
|
conn.executemany(f"UPDATE {table} SET seq_id = ? WHERE rowid = ?", updates)
|
2026-04-11 23:06:01 -07:00
|
|
|
logger.info("Fixed %d BLOB seq_ids in %s", len(updates), table)
|
|
|
|
|
conn.commit()
|
|
|
|
|
except Exception:
|
|
|
|
|
logger.exception("Could not fix BLOB seq_ids in %s", db_path)
|
|
|
|
|
|
2026-04-11 19:16:49 -04:00
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Collection adapter
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _as_list(v: Any) -> list:
|
|
|
|
|
"""Coerce possibly-None scalar-or-list into a list (defensive for chroma nulls)."""
|
|
|
|
|
if v is None:
|
|
|
|
|
return []
|
|
|
|
|
if isinstance(v, list):
|
|
|
|
|
return v
|
|
|
|
|
return [v]
|
|
|
|
|
|
|
|
|
|
|
2026-04-11 19:16:49 -04:00
|
|
|
class ChromaCollection(BaseCollection):
|
2026-04-18 12:45:16 -03:00
|
|
|
"""Thin adapter translating ChromaDB dict returns into typed results."""
|
2026-04-11 19:16:49 -04:00
|
|
|
|
|
|
|
|
def __init__(self, collection):
|
|
|
|
|
self._collection = collection
|
|
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
# Writes
|
|
|
|
|
# ------------------------------------------------------------------
|
2026-04-11 19:16:49 -04:00
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
def add(self, *, documents, ids, metadatas=None, embeddings=None):
|
|
|
|
|
kwargs: dict[str, Any] = {"documents": documents, "ids": ids}
|
|
|
|
|
if metadatas is not None:
|
|
|
|
|
kwargs["metadatas"] = metadatas
|
|
|
|
|
if embeddings is not None:
|
|
|
|
|
kwargs["embeddings"] = embeddings
|
|
|
|
|
self._collection.add(**kwargs)
|
|
|
|
|
|
|
|
|
|
def upsert(self, *, documents, ids, metadatas=None, embeddings=None):
|
|
|
|
|
kwargs: dict[str, Any] = {"documents": documents, "ids": ids}
|
|
|
|
|
if metadatas is not None:
|
|
|
|
|
kwargs["metadatas"] = metadatas
|
|
|
|
|
if embeddings is not None:
|
|
|
|
|
kwargs["embeddings"] = embeddings
|
|
|
|
|
self._collection.upsert(**kwargs)
|
|
|
|
|
|
|
|
|
|
def update(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
ids,
|
|
|
|
|
documents=None,
|
|
|
|
|
metadatas=None,
|
|
|
|
|
embeddings=None,
|
|
|
|
|
):
|
|
|
|
|
if documents is None and metadatas is None and embeddings is None:
|
|
|
|
|
raise ValueError("update requires at least one of documents, metadatas, embeddings")
|
|
|
|
|
kwargs: dict[str, Any] = {"ids": ids}
|
|
|
|
|
if documents is not None:
|
|
|
|
|
kwargs["documents"] = documents
|
|
|
|
|
if metadatas is not None:
|
|
|
|
|
kwargs["metadatas"] = metadatas
|
|
|
|
|
if embeddings is not None:
|
|
|
|
|
kwargs["embeddings"] = embeddings
|
2026-04-14 00:31:16 -03:00
|
|
|
self._collection.update(**kwargs)
|
|
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
# Reads
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def query(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
query_texts=None,
|
|
|
|
|
query_embeddings=None,
|
|
|
|
|
n_results=10,
|
|
|
|
|
where=None,
|
|
|
|
|
where_document=None,
|
|
|
|
|
include=None,
|
|
|
|
|
) -> QueryResult:
|
|
|
|
|
_validate_where(where)
|
|
|
|
|
_validate_where(where_document)
|
|
|
|
|
|
|
|
|
|
spec = _IncludeSpec.resolve(include, default_distances=True)
|
|
|
|
|
chroma_include: list[str] = []
|
|
|
|
|
if spec.documents:
|
|
|
|
|
chroma_include.append("documents")
|
|
|
|
|
if spec.metadatas:
|
|
|
|
|
chroma_include.append("metadatas")
|
|
|
|
|
if spec.distances:
|
|
|
|
|
chroma_include.append("distances")
|
|
|
|
|
if spec.embeddings:
|
|
|
|
|
chroma_include.append("embeddings")
|
|
|
|
|
|
|
|
|
|
kwargs: dict[str, Any] = {
|
|
|
|
|
"n_results": n_results,
|
|
|
|
|
"include": chroma_include,
|
|
|
|
|
}
|
|
|
|
|
if query_texts is not None:
|
|
|
|
|
kwargs["query_texts"] = query_texts
|
|
|
|
|
if query_embeddings is not None:
|
|
|
|
|
kwargs["query_embeddings"] = query_embeddings
|
|
|
|
|
if where is not None:
|
|
|
|
|
kwargs["where"] = where
|
|
|
|
|
if where_document is not None:
|
|
|
|
|
kwargs["where_document"] = where_document
|
|
|
|
|
|
|
|
|
|
raw = self._collection.query(**kwargs)
|
|
|
|
|
|
|
|
|
|
num_queries = (
|
|
|
|
|
len(query_texts)
|
|
|
|
|
if query_texts is not None
|
|
|
|
|
else (len(query_embeddings) if query_embeddings is not None else 1)
|
|
|
|
|
)
|
2026-04-11 19:16:49 -04:00
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
ids = raw.get("ids") or []
|
|
|
|
|
if not ids:
|
|
|
|
|
return QueryResult.empty(num_queries=num_queries)
|
|
|
|
|
|
|
|
|
|
documents = raw.get("documents") or [[] for _ in ids]
|
|
|
|
|
metadatas = raw.get("metadatas") or [[] for _ in ids]
|
|
|
|
|
distances = raw.get("distances") or [[] for _ in ids]
|
|
|
|
|
embeddings_raw = raw.get("embeddings") if spec.embeddings else None
|
|
|
|
|
|
|
|
|
|
def _none_list_to_empty(outer):
|
|
|
|
|
return [(inner or []) for inner in outer]
|
|
|
|
|
|
|
|
|
|
return QueryResult(
|
|
|
|
|
ids=_none_list_to_empty(ids),
|
|
|
|
|
documents=_none_list_to_empty(documents),
|
|
|
|
|
metadatas=_none_list_to_empty(metadatas),
|
|
|
|
|
distances=_none_list_to_empty(distances),
|
|
|
|
|
embeddings=(
|
|
|
|
|
[list(inner) for inner in embeddings_raw]
|
|
|
|
|
if spec.embeddings and embeddings_raw is not None
|
|
|
|
|
else None
|
|
|
|
|
),
|
|
|
|
|
)
|
2026-04-11 19:16:49 -04:00
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
def get(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
ids=None,
|
|
|
|
|
where=None,
|
|
|
|
|
where_document=None,
|
|
|
|
|
limit=None,
|
|
|
|
|
offset=None,
|
|
|
|
|
include=None,
|
|
|
|
|
) -> GetResult:
|
|
|
|
|
_validate_where(where)
|
|
|
|
|
_validate_where(where_document)
|
|
|
|
|
|
|
|
|
|
spec = _IncludeSpec.resolve(include, default_distances=False)
|
|
|
|
|
chroma_include: list[str] = []
|
|
|
|
|
if spec.documents:
|
|
|
|
|
chroma_include.append("documents")
|
|
|
|
|
if spec.metadatas:
|
|
|
|
|
chroma_include.append("metadatas")
|
|
|
|
|
if spec.embeddings:
|
|
|
|
|
chroma_include.append("embeddings")
|
|
|
|
|
|
|
|
|
|
kwargs: dict[str, Any] = {"include": chroma_include}
|
|
|
|
|
if ids is not None:
|
|
|
|
|
kwargs["ids"] = ids
|
|
|
|
|
if where is not None:
|
|
|
|
|
kwargs["where"] = where
|
|
|
|
|
if where_document is not None:
|
|
|
|
|
kwargs["where_document"] = where_document
|
|
|
|
|
if limit is not None:
|
|
|
|
|
kwargs["limit"] = limit
|
|
|
|
|
if offset is not None:
|
|
|
|
|
kwargs["offset"] = offset
|
|
|
|
|
|
|
|
|
|
raw = self._collection.get(**kwargs)
|
|
|
|
|
out_ids = list(raw.get("ids") or [])
|
|
|
|
|
out_docs = list(raw.get("documents") or []) if spec.documents else []
|
|
|
|
|
out_metas = list(raw.get("metadatas") or []) if spec.metadatas else []
|
|
|
|
|
out_embeds = raw.get("embeddings") if spec.embeddings else None
|
|
|
|
|
|
|
|
|
|
# Pad doc/meta lists to match ids so downstream zipping is safe.
|
|
|
|
|
if spec.documents and len(out_docs) < len(out_ids):
|
|
|
|
|
out_docs = out_docs + [""] * (len(out_ids) - len(out_docs))
|
|
|
|
|
if spec.metadatas and len(out_metas) < len(out_ids):
|
|
|
|
|
out_metas = out_metas + [{}] * (len(out_ids) - len(out_metas))
|
|
|
|
|
|
|
|
|
|
return GetResult(
|
|
|
|
|
ids=out_ids,
|
|
|
|
|
documents=out_docs,
|
|
|
|
|
metadatas=out_metas,
|
|
|
|
|
embeddings=[list(v) for v in out_embeds] if out_embeds is not None else None,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def delete(self, *, ids=None, where=None):
|
|
|
|
|
_validate_where(where)
|
|
|
|
|
kwargs: dict[str, Any] = {}
|
|
|
|
|
if ids is not None:
|
|
|
|
|
kwargs["ids"] = ids
|
|
|
|
|
if where is not None:
|
|
|
|
|
kwargs["where"] = where
|
2026-04-11 19:16:49 -04:00
|
|
|
self._collection.delete(**kwargs)
|
|
|
|
|
|
|
|
|
|
def count(self):
|
|
|
|
|
return self._collection.count()
|
|
|
|
|
|
|
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Backend
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ChromaBackend(BaseBackend):
|
|
|
|
|
"""MemPalace's default ChromaDB backend.
|
|
|
|
|
|
|
|
|
|
Maintains two caches:
|
|
|
|
|
|
|
|
|
|
* ``self._clients`` — ``palace_path -> PersistentClient`` for callers
|
|
|
|
|
using the ``PalaceRef`` / :meth:`get_collection` path.
|
|
|
|
|
* An inode+mtime freshness check absorbed from ``mcp_server._get_client``
|
|
|
|
|
(merged via #757) ensuring a palace rebuild on disk is detected on the
|
|
|
|
|
next :meth:`get_collection` call.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
name = "chroma"
|
|
|
|
|
capabilities = frozenset(
|
|
|
|
|
{
|
|
|
|
|
"supports_embeddings_in",
|
|
|
|
|
"supports_embeddings_passthrough",
|
|
|
|
|
"supports_embeddings_out",
|
|
|
|
|
"supports_metadata_filters",
|
|
|
|
|
"supports_contains_fast",
|
|
|
|
|
"local_mode",
|
|
|
|
|
}
|
|
|
|
|
)
|
2026-04-11 19:16:49 -04:00
|
|
|
|
2026-04-14 00:31:16 -03:00
|
|
|
def __init__(self):
|
2026-04-18 12:45:16 -03:00
|
|
|
# palace_path -> PersistentClient
|
|
|
|
|
self._clients: dict[str, Any] = {}
|
|
|
|
|
# palace_path -> (inode, mtime) of chroma.sqlite3 at cache time.
|
|
|
|
|
self._freshness: dict[str, tuple[int, float]] = {}
|
|
|
|
|
self._closed = False
|
2026-04-14 00:31:16 -03:00
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
# Internal helpers
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
@staticmethod
|
|
|
|
|
def _db_stat(palace_path: str) -> tuple[int, float]:
|
|
|
|
|
"""Return ``(inode, mtime)`` of ``chroma.sqlite3`` or ``(0, 0.0)`` if absent."""
|
|
|
|
|
db_path = os.path.join(palace_path, "chroma.sqlite3")
|
|
|
|
|
try:
|
|
|
|
|
st = os.stat(db_path)
|
|
|
|
|
return (st.st_ino, st.st_mtime)
|
|
|
|
|
except OSError:
|
|
|
|
|
return (0, 0.0)
|
|
|
|
|
|
2026-04-14 00:31:16 -03:00
|
|
|
def _client(self, palace_path: str):
|
2026-04-18 12:45:16 -03:00
|
|
|
"""Return a cached ``PersistentClient``, rebuilding on inode/mtime change.
|
|
|
|
|
|
|
|
|
|
Handles the palace-rebuild case (repair/nuke/purge) by invalidating the
|
|
|
|
|
cache when ``chroma.sqlite3`` changes on disk. FAT/exFAT return inode 0,
|
|
|
|
|
so inode comparisons only fire when non-zero (matches #757 semantics).
|
|
|
|
|
"""
|
|
|
|
|
if self._closed:
|
|
|
|
|
from .base import BackendClosedError # late import avoids cycles at module load
|
|
|
|
|
|
|
|
|
|
raise BackendClosedError("ChromaBackend has been closed")
|
|
|
|
|
|
|
|
|
|
cached = self._clients.get(palace_path)
|
|
|
|
|
cached_inode, cached_mtime = self._freshness.get(palace_path, (0, 0.0))
|
|
|
|
|
current_inode, current_mtime = self._db_stat(palace_path)
|
|
|
|
|
|
|
|
|
|
inode_changed = current_inode != 0 and cached_inode != 0 and current_inode != cached_inode
|
|
|
|
|
mtime_changed = (
|
|
|
|
|
current_mtime != 0.0 and cached_mtime != 0.0 and current_mtime > cached_mtime
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if cached is None or inode_changed or mtime_changed:
|
2026-04-14 00:31:16 -03:00
|
|
|
_fix_blob_seq_ids(palace_path)
|
2026-04-18 12:45:16 -03:00
|
|
|
cached = chromadb.PersistentClient(path=palace_path)
|
|
|
|
|
self._clients[palace_path] = cached
|
|
|
|
|
self._freshness[palace_path] = (current_inode, current_mtime)
|
|
|
|
|
return cached
|
2026-04-14 00:31:16 -03:00
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
2026-04-18 12:45:16 -03:00
|
|
|
# Public static helpers (legacy; prefer :meth:`get_collection`)
|
2026-04-14 00:31:16 -03:00
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def make_client(palace_path: str):
|
2026-04-18 12:45:16 -03:00
|
|
|
"""Create a fresh ``PersistentClient`` (fixes BLOB seq_ids first).
|
2026-04-14 00:31:16 -03:00
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
Deprecated-ish: exposed for legacy long-lived callers that manage their
|
|
|
|
|
own client cache. New code should obtain a collection through
|
|
|
|
|
:meth:`get_collection` which manages caching internally.
|
2026-04-14 00:31:16 -03:00
|
|
|
"""
|
|
|
|
|
_fix_blob_seq_ids(palace_path)
|
|
|
|
|
return chromadb.PersistentClient(path=palace_path)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def backend_version() -> str:
|
|
|
|
|
"""Return the installed chromadb package version string."""
|
|
|
|
|
return chromadb.__version__
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
2026-04-18 12:45:16 -03:00
|
|
|
# BaseBackend surface
|
2026-04-14 00:31:16 -03:00
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
def get_collection(
|
|
|
|
|
self,
|
|
|
|
|
*args,
|
|
|
|
|
**kwargs,
|
|
|
|
|
) -> ChromaCollection:
|
|
|
|
|
"""Obtain a collection for a palace.
|
|
|
|
|
|
|
|
|
|
Supports two calling conventions during the RFC 001 transition:
|
|
|
|
|
|
|
|
|
|
* New (preferred): ``get_collection(palace=PalaceRef, collection_name=...,
|
|
|
|
|
create=False, options=None)``.
|
|
|
|
|
* Legacy: ``get_collection(palace_path, collection_name, create=False)``
|
|
|
|
|
— still used by callers not yet migrated.
|
|
|
|
|
"""
|
|
|
|
|
palace_ref, collection_name, create, options = _normalize_get_collection_args(args, kwargs)
|
|
|
|
|
|
|
|
|
|
palace_path = palace_ref.local_path
|
|
|
|
|
if palace_path is None:
|
|
|
|
|
raise PalaceNotFoundError("ChromaBackend requires PalaceRef.local_path")
|
|
|
|
|
|
2026-04-11 19:16:49 -04:00
|
|
|
if not create and not os.path.isdir(palace_path):
|
2026-04-18 12:45:16 -03:00
|
|
|
raise PalaceNotFoundError(palace_path)
|
2026-04-11 19:16:49 -04:00
|
|
|
|
|
|
|
|
if create:
|
|
|
|
|
os.makedirs(palace_path, exist_ok=True)
|
|
|
|
|
try:
|
|
|
|
|
os.chmod(palace_path, 0o700)
|
|
|
|
|
except (OSError, NotImplementedError):
|
|
|
|
|
pass
|
|
|
|
|
|
2026-04-14 00:31:16 -03:00
|
|
|
client = self._client(palace_path)
|
2026-04-18 12:45:16 -03:00
|
|
|
hnsw_space = "cosine"
|
|
|
|
|
if options and isinstance(options, dict):
|
|
|
|
|
hnsw_space = options.get("hnsw_space", hnsw_space)
|
|
|
|
|
|
2026-04-11 19:16:49 -04:00
|
|
|
if create:
|
2026-04-13 11:00:52 -04:00
|
|
|
collection = client.get_or_create_collection(
|
2026-04-18 12:45:16 -03:00
|
|
|
collection_name, metadata={"hnsw:space": hnsw_space}
|
2026-04-13 11:00:52 -04:00
|
|
|
)
|
2026-04-11 19:16:49 -04:00
|
|
|
else:
|
|
|
|
|
collection = client.get_collection(collection_name)
|
|
|
|
|
return ChromaCollection(collection)
|
2026-04-14 00:31:16 -03:00
|
|
|
|
2026-04-18 12:45:16 -03:00
|
|
|
def close_palace(self, palace) -> None:
|
|
|
|
|
"""Drop cached handles for ``palace``. Accepts ``PalaceRef`` or legacy path str."""
|
|
|
|
|
path = palace.local_path if isinstance(palace, PalaceRef) else palace
|
|
|
|
|
if path is None:
|
|
|
|
|
return
|
|
|
|
|
self._clients.pop(path, None)
|
|
|
|
|
self._freshness.pop(path, None)
|
|
|
|
|
|
|
|
|
|
def close(self) -> None:
|
|
|
|
|
self._clients.clear()
|
|
|
|
|
self._freshness.clear()
|
|
|
|
|
self._closed = True
|
|
|
|
|
|
|
|
|
|
def health(self, palace: Optional[PalaceRef] = None) -> HealthStatus:
|
|
|
|
|
if self._closed:
|
|
|
|
|
return HealthStatus.unhealthy("backend closed")
|
|
|
|
|
return HealthStatus.healthy()
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def detect(cls, path: str) -> bool:
|
|
|
|
|
return os.path.isfile(os.path.join(path, "chroma.sqlite3"))
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
# Legacy (pre-RFC 001) surface — retained while callers migrate.
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def get_or_create_collection(self, palace_path: str, collection_name: str) -> ChromaCollection:
|
|
|
|
|
"""Legacy shim for ``get_collection(..., create=True)`` by path string."""
|
2026-04-14 00:31:16 -03:00
|
|
|
return self.get_collection(palace_path, collection_name, create=True)
|
|
|
|
|
|
|
|
|
|
def delete_collection(self, palace_path: str, collection_name: str) -> None:
|
2026-04-18 12:45:16 -03:00
|
|
|
"""Delete ``collection_name`` from the palace at ``palace_path``."""
|
2026-04-14 00:31:16 -03:00
|
|
|
self._client(palace_path).delete_collection(collection_name)
|
|
|
|
|
|
|
|
|
|
def create_collection(
|
|
|
|
|
self, palace_path: str, collection_name: str, hnsw_space: str = "cosine"
|
2026-04-18 12:45:16 -03:00
|
|
|
) -> ChromaCollection:
|
|
|
|
|
"""Create (not get-or-create) ``collection_name`` with the given HNSW space."""
|
2026-04-14 00:31:16 -03:00
|
|
|
collection = self._client(palace_path).create_collection(
|
|
|
|
|
collection_name, metadata={"hnsw:space": hnsw_space}
|
|
|
|
|
)
|
|
|
|
|
return ChromaCollection(collection)
|
2026-04-18 12:45:16 -03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_get_collection_args(args, kwargs):
|
|
|
|
|
"""Unify legacy positional ``(palace_path, collection_name, create)`` calls
|
|
|
|
|
with the new kwargs-only ``(palace=PalaceRef, collection_name=..., create=...)``.
|
|
|
|
|
|
|
|
|
|
Returns ``(PalaceRef, collection_name, create, options)``.
|
|
|
|
|
"""
|
|
|
|
|
# New-style: palace= kwarg with a PalaceRef (spec path).
|
|
|
|
|
if "palace" in kwargs:
|
|
|
|
|
palace_ref = kwargs.pop("palace")
|
|
|
|
|
if not isinstance(palace_ref, PalaceRef):
|
|
|
|
|
raise TypeError("palace= must be a PalaceRef instance")
|
|
|
|
|
collection_name = kwargs.pop("collection_name")
|
|
|
|
|
create = kwargs.pop("create", False)
|
|
|
|
|
options = kwargs.pop("options", None)
|
|
|
|
|
if kwargs:
|
|
|
|
|
raise TypeError(f"unexpected kwargs: {sorted(kwargs)}")
|
|
|
|
|
if args:
|
|
|
|
|
raise TypeError("positional args not allowed with palace= kwarg")
|
|
|
|
|
return palace_ref, collection_name, create, options
|
|
|
|
|
|
|
|
|
|
# Legacy: first positional is a path string.
|
|
|
|
|
if args:
|
|
|
|
|
palace_path = args[0]
|
|
|
|
|
rest = list(args[1:])
|
|
|
|
|
collection_name = kwargs.pop("collection_name", None) or (rest.pop(0) if rest else None)
|
|
|
|
|
if collection_name is None:
|
|
|
|
|
raise TypeError("collection_name is required")
|
|
|
|
|
create = kwargs.pop("create", False)
|
|
|
|
|
if rest:
|
|
|
|
|
create = rest.pop(0)
|
|
|
|
|
if kwargs:
|
|
|
|
|
raise TypeError(f"unexpected kwargs: {sorted(kwargs)}")
|
|
|
|
|
return (
|
|
|
|
|
PalaceRef(id=palace_path, local_path=palace_path),
|
|
|
|
|
collection_name,
|
|
|
|
|
bool(create),
|
|
|
|
|
None,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Legacy kwargs-only (palace_path=..., collection_name=..., create=...)
|
|
|
|
|
if "palace_path" in kwargs:
|
|
|
|
|
palace_path = kwargs.pop("palace_path")
|
|
|
|
|
collection_name = kwargs.pop("collection_name")
|
|
|
|
|
create = kwargs.pop("create", False)
|
|
|
|
|
if kwargs:
|
|
|
|
|
raise TypeError(f"unexpected kwargs: {sorted(kwargs)}")
|
|
|
|
|
return (
|
|
|
|
|
PalaceRef(id=palace_path, local_path=palace_path),
|
|
|
|
|
collection_name,
|
|
|
|
|
bool(create),
|
|
|
|
|
None,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
raise TypeError("get_collection requires palace= or a positional palace_path")
|