Merge pull request #1262 from Legion345/fix/stop-hook-crash
fix(storage): stop ChromaDB from crashing when reopening an existing …
This commit is contained in:
@@ -8,6 +8,7 @@ from pathlib import Path
|
|||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import chromadb
|
import chromadb
|
||||||
|
from chromadb.errors import NotFoundError as _ChromaNotFoundError
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
BaseBackend,
|
BaseBackend,
|
||||||
@@ -1093,15 +1094,18 @@ class ChromaBackend(BaseBackend):
|
|||||||
ef_kwargs = {"embedding_function": ef} if ef is not None else {}
|
ef_kwargs = {"embedding_function": ef} if ef is not None else {}
|
||||||
|
|
||||||
if create:
|
if create:
|
||||||
collection = client.get_or_create_collection(
|
try:
|
||||||
collection_name,
|
collection = client.get_collection(collection_name, **ef_kwargs)
|
||||||
metadata={
|
except _ChromaNotFoundError:
|
||||||
"hnsw:space": hnsw_space,
|
collection = client.create_collection(
|
||||||
"hnsw:num_threads": 1,
|
collection_name,
|
||||||
**_HNSW_BLOAT_GUARD,
|
metadata={
|
||||||
},
|
"hnsw:space": hnsw_space,
|
||||||
**ef_kwargs,
|
"hnsw:num_threads": 1,
|
||||||
)
|
**_HNSW_BLOAT_GUARD,
|
||||||
|
},
|
||||||
|
**ef_kwargs,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
collection = client.get_collection(collection_name, **ef_kwargs)
|
collection = client.get_collection(collection_name, **ef_kwargs)
|
||||||
_pin_hnsw_threads(collection)
|
_pin_hnsw_threads(collection)
|
||||||
|
|||||||
@@ -372,6 +372,32 @@ def test_chroma_backend_create_collection_sets_hnsw_bloat_guard(tmp_path):
|
|||||||
assert col.metadata.get("hnsw:sync_threshold") == 50_000
|
assert col.metadata.get("hnsw:sync_threshold") == 50_000
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_collection_create_true_is_idempotent(tmp_path):
|
||||||
|
"""Calling get_collection(create=True) twice on the same name must not crash.
|
||||||
|
|
||||||
|
ChromaDB 1.5.x's Rust bindings SIGSEGV when get_or_create_collection is
|
||||||
|
called with metadata that differs from the stored collection metadata. The
|
||||||
|
fix splits the call into get_collection -> fallback create_collection so the
|
||||||
|
metadata-comparison codepath in chromadb_rust_bindings is never reached for
|
||||||
|
existing collections. Regression guard for issue #1089.
|
||||||
|
"""
|
||||||
|
palace = str(tmp_path / "palace")
|
||||||
|
backend = ChromaBackend()
|
||||||
|
backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
|
||||||
|
col2 = backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
|
||||||
|
assert isinstance(col2, ChromaCollection)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_collection_create_true_preserves_existing_metadata(tmp_path):
|
||||||
|
"""Existing collection metadata is not overwritten when reopened with create=True."""
|
||||||
|
palace = str(tmp_path / "palace")
|
||||||
|
backend = ChromaBackend()
|
||||||
|
backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
|
||||||
|
col = backend.get_collection(palace, collection_name="mempalace_drawers", create=True)
|
||||||
|
assert col._collection.metadata["hnsw:space"] == "cosine"
|
||||||
|
assert col._collection.metadata.get("hnsw:batch_size") == 50_000
|
||||||
|
|
||||||
|
|
||||||
def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path):
|
def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path):
|
||||||
"""Simulate a ChromaDB 0.6.x database with BLOB seq_ids and verify repair."""
|
"""Simulate a ChromaDB 0.6.x database with BLOB seq_ids and verify repair."""
|
||||||
db_path = tmp_path / "chroma.sqlite3"
|
db_path = tmp_path / "chroma.sqlite3"
|
||||||
|
|||||||
Reference in New Issue
Block a user