Files
mempalace/tests/test_hnsw_payload_health.py
T

114 lines
3.3 KiB
Python

import os
from pathlib import Path
from mempalace.backends.chroma import (
_HNSW_LINK_TO_DATA_MAX_RATIO,
_hnsw_link_to_data_ratio,
_segment_appears_healthy,
quarantine_stale_hnsw,
)
def _write_segment(
seg_dir: Path,
*,
data_size: int = 100,
link_size: int = 100,
write_metadata: bool = True,
) -> None:
seg_dir.mkdir(parents=True, exist_ok=True)
(seg_dir / "data_level0.bin").write_bytes(b"\0" * data_size)
(seg_dir / "link_lists.bin").write_bytes(b"\0" * link_size)
if write_metadata:
# Enough bytes to pass the existing pickle envelope sniff-test:
# starts with pickle protocol marker 0x80 and ends with STOP 0x2e.
(seg_dir / "index_metadata.pickle").write_bytes(b"\x80" + b"x" * 16 + b"\x2e")
def test_hnsw_link_to_data_ratio_reports_payload_size_ratio(tmp_path):
seg_dir = tmp_path / "11111111-2222-3333-4444-555555555555"
_write_segment(seg_dir, data_size=100, link_size=250)
assert _hnsw_link_to_data_ratio(str(seg_dir)) == 2.5
def test_segment_health_rejects_exploded_link_lists_even_with_valid_pickle(tmp_path):
seg_dir = tmp_path / "11111111-2222-3333-4444-555555555555"
_write_segment(
seg_dir,
data_size=100,
link_size=int(100 * (_HNSW_LINK_TO_DATA_MAX_RATIO + 1)),
write_metadata=True,
)
assert not _segment_appears_healthy(str(seg_dir))
def test_segment_health_keeps_reasonable_payload_with_valid_pickle(tmp_path):
seg_dir = tmp_path / "11111111-2222-3333-4444-555555555555"
_write_segment(
seg_dir,
data_size=100,
link_size=int(100 * _HNSW_LINK_TO_DATA_MAX_RATIO),
write_metadata=True,
)
assert _segment_appears_healthy(str(seg_dir))
def test_quarantine_catches_link_bloat_without_mtime_drift(tmp_path):
palace = tmp_path / "palace"
palace.mkdir()
db_path = palace / "chroma.sqlite3"
db_path.write_text("sqlite placeholder")
seg_dir = palace / "11111111-2222-3333-4444-555555555555"
_write_segment(
seg_dir,
data_size=100,
link_size=int(100 * (_HNSW_LINK_TO_DATA_MAX_RATIO + 1)),
write_metadata=True,
)
# Make sqlite and HNSW mtimes identical. The old mtime-only gate would
# skip this segment even though the payload is structurally corrupt.
same_time = 1_700_000_000
os.utime(db_path, (same_time, same_time))
os.utime(seg_dir / "data_level0.bin", (same_time, same_time))
moved = quarantine_stale_hnsw(str(palace), stale_seconds=999_999)
assert len(moved) == 1
assert not seg_dir.exists()
moved_path = Path(moved[0])
assert moved_path.exists()
assert moved_path.name.startswith("11111111-2222-3333-4444-555555555555.drift-")
def test_quarantine_leaves_reasonable_payload_in_place(tmp_path):
palace = tmp_path / "palace"
palace.mkdir()
db_path = palace / "chroma.sqlite3"
db_path.write_text("sqlite placeholder")
seg_dir = palace / "11111111-2222-3333-4444-555555555555"
_write_segment(
seg_dir,
data_size=100,
link_size=100,
write_metadata=True,
)
same_time = 1_700_000_000
os.utime(db_path, (same_time, same_time))
os.utime(seg_dir / "data_level0.bin", (same_time, same_time))
moved = quarantine_stale_hnsw(str(palace), stale_seconds=999_999)
assert moved == []
assert seg_dir.exists()