Files
mempalace/tests/test_convo_miner_unit.py
T
Tal Muskal abd52534bb test: bring coverage to 85%, set threshold to 85, reset version to 3.0.11
- Add tests for config, convo_miner, spellcheck, knowledge_graph
- Fix Windows PermissionError in test cleanup (chromadb file locks)
- Add UTF-8 encoding to split_mega_files, entity_registry, hooks_cli
- Fix mcp_server parse_known_args logging for unknown args
- Set coverage threshold to 85 in pyproject.toml and CI
- Reset all version files to 3.0.11

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-08 21:38:12 +03:00

103 lines
3.9 KiB
Python

"""Unit tests for convo_miner pure functions (no chromadb needed)."""
from mempalace.convo_miner import (
chunk_exchanges,
detect_convo_room,
scan_convos,
)
class TestChunkExchanges:
def test_exchange_chunking(self):
content = (
"> What is memory?\n"
"Memory is persistence of information over time.\n\n"
"> Why does it matter?\n"
"It enables continuity across sessions and conversations.\n\n"
"> How do we build it?\n"
"With structured storage and retrieval mechanisms.\n"
)
chunks = chunk_exchanges(content)
assert len(chunks) >= 2
assert all("content" in c and "chunk_index" in c for c in chunks)
def test_paragraph_fallback(self):
"""Content without '>' lines falls back to paragraph chunking."""
content = (
"This is a long paragraph about memory systems. " * 10 + "\n\n"
"This is another paragraph about storage. " * 10 + "\n\n"
"And a third paragraph about retrieval. " * 10
)
chunks = chunk_exchanges(content)
assert len(chunks) >= 2
def test_paragraph_line_group_fallback(self):
"""Long content with no paragraph breaks chunks by line groups."""
lines = [f"Line {i}: some content that is meaningful" for i in range(60)]
content = "\n".join(lines)
chunks = chunk_exchanges(content)
assert len(chunks) >= 1
def test_empty_content(self):
chunks = chunk_exchanges("")
assert chunks == []
def test_short_content_skipped(self):
chunks = chunk_exchanges("> hi\nbye")
# Too short to produce chunks (below MIN_CHUNK_SIZE)
assert isinstance(chunks, list)
class TestDetectConvoRoom:
def test_technical_room(self):
content = "Let me debug this python function and fix the code error in the api"
assert detect_convo_room(content) == "technical"
def test_planning_room(self):
content = "We need to plan the roadmap for the next sprint and set milestone deadlines"
assert detect_convo_room(content) == "planning"
def test_architecture_room(self):
content = "The architecture uses a service layer with component interface and module design"
assert detect_convo_room(content) == "architecture"
def test_decisions_room(self):
content = "We decided to switch and migrated to the new framework after we chose it"
assert detect_convo_room(content) == "decisions"
def test_general_fallback(self):
content = "Hello, how are you doing today? The weather is nice."
assert detect_convo_room(content) == "general"
class TestScanConvos:
def test_scan_finds_txt_and_md(self, tmp_path):
(tmp_path / "chat.txt").write_text("hello", encoding="utf-8")
(tmp_path / "notes.md").write_text("world", encoding="utf-8")
(tmp_path / "image.png").write_bytes(b"fake")
files = scan_convos(str(tmp_path))
extensions = {f.suffix for f in files}
assert ".txt" in extensions
assert ".md" in extensions
assert ".png" not in extensions
def test_scan_skips_git_dir(self, tmp_path):
git_dir = tmp_path / ".git"
git_dir.mkdir()
(git_dir / "config.txt").write_text("git stuff", encoding="utf-8")
(tmp_path / "chat.txt").write_text("hello", encoding="utf-8")
files = scan_convos(str(tmp_path))
assert len(files) == 1
def test_scan_skips_meta_json(self, tmp_path):
(tmp_path / "chat.meta.json").write_text("{}", encoding="utf-8")
(tmp_path / "chat.json").write_text("{}", encoding="utf-8")
files = scan_convos(str(tmp_path))
names = [f.name for f in files]
assert "chat.json" in names
assert "chat.meta.json" not in names
def test_scan_empty_dir(self, tmp_path):
files = scan_convos(str(tmp_path))
assert files == []