test: expand coverage from 20 to 92 tests, migrate to uv
- Migrate from setuptools to hatchling build backend - Add dependency-groups (PEP 735) for dev tooling (pytest, ruff) - Remove redundant requirements.txt in favor of uv.lock - Fix __version__ mismatch (2.0.0 -> 3.0.0 to match pyproject.toml) New test files: - conftest.py: shared fixtures (isolated palace, KG, ChromaDB collection) - test_knowledge_graph.py: 17 tests (entity CRUD, temporal queries, timeline) - test_mcp_server.py: 25 tests (protocol dispatch, read/write/KG/diary tools) - test_searcher.py: 7 tests (search_memories API, filters, error handling) - test_dialect.py: 13 tests (AAAK compression, entity/emotion detection, zettel encoding) All 92 tests pass on Python 3.13 with chromadb 0.6.3.
This commit is contained in:
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
test_dialect.py — Tests for the AAAK Dialect compression system.
|
||||
|
||||
Covers plain text compression, entity detection, emotion detection,
|
||||
topic extraction, key sentence extraction, zettel encoding, and stats.
|
||||
"""
|
||||
|
||||
from mempalace.dialect import Dialect
|
||||
|
||||
|
||||
class TestPlainTextCompression:
|
||||
def test_compress_basic(self):
|
||||
d = Dialect()
|
||||
result = d.compress("We decided to use GraphQL instead of REST for the API layer.")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
# AAAK format uses pipe-separated fields
|
||||
assert "|" in result
|
||||
|
||||
def test_compress_with_metadata(self):
|
||||
d = Dialect()
|
||||
result = d.compress(
|
||||
"Authentication now uses JWT tokens.",
|
||||
metadata={"wing": "project", "room": "backend", "source_file": "auth.py"},
|
||||
)
|
||||
assert "project" in result
|
||||
assert "backend" in result
|
||||
|
||||
def test_compress_produces_entity_codes(self):
|
||||
d = Dialect(entities={"Alice": "ALC", "Bob": "BOB"})
|
||||
result = d.compress("Alice told Bob about the new deployment strategy.")
|
||||
assert "ALC" in result or "BOB" in result
|
||||
|
||||
def test_compress_empty_text(self):
|
||||
d = Dialect()
|
||||
result = d.compress("")
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
class TestEntityDetection:
|
||||
def test_known_entities(self):
|
||||
d = Dialect(entities={"Alice": "ALC"})
|
||||
found = d._detect_entities_in_text("Alice went to the store.")
|
||||
assert "ALC" in found
|
||||
|
||||
def test_auto_code_unknown_entities(self):
|
||||
d = Dialect()
|
||||
found = d._detect_entities_in_text("I spoke with Bernardo about the project today.")
|
||||
assert any(code for code in found if len(code) == 3)
|
||||
|
||||
def test_skip_names(self):
|
||||
d = Dialect(entities={"Gandalf": "GAN"}, skip_names=["Gandalf"])
|
||||
code = d.encode_entity("Gandalf")
|
||||
assert code is None
|
||||
|
||||
|
||||
class TestEmotionDetection:
|
||||
def test_detect_emotions(self):
|
||||
d = Dialect()
|
||||
emotions = d._detect_emotions("I'm really excited and happy about this breakthrough!")
|
||||
assert len(emotions) > 0
|
||||
|
||||
def test_max_three_emotions(self):
|
||||
d = Dialect()
|
||||
text = "I feel scared, happy, angry, surprised, disgusted, and confused."
|
||||
emotions = d._detect_emotions(text)
|
||||
assert len(emotions) <= 3
|
||||
|
||||
|
||||
class TestTopicExtraction:
|
||||
def test_extract_topics(self):
|
||||
d = Dialect()
|
||||
topics = d._extract_topics(
|
||||
"The Python authentication server uses PostgreSQL for storage "
|
||||
"and Redis for caching sessions."
|
||||
)
|
||||
assert len(topics) > 0
|
||||
assert len(topics) <= 3
|
||||
|
||||
def test_boosts_technical_terms(self):
|
||||
d = Dialect()
|
||||
topics = d._extract_topics("GraphQL vs REST: we chose GraphQL for the new API endpoint.")
|
||||
# "graphql" should appear since it's mentioned twice + capitalized
|
||||
topic_lower = [t.lower() for t in topics]
|
||||
assert "graphql" in topic_lower
|
||||
|
||||
|
||||
class TestKeySentenceExtraction:
|
||||
def test_extract_key_sentence(self):
|
||||
d = Dialect()
|
||||
text = (
|
||||
"The server runs on port 3000. "
|
||||
"We decided to use PostgreSQL instead of MongoDB. "
|
||||
"The config file needs updating."
|
||||
)
|
||||
key = d._extract_key_sentence(text)
|
||||
assert "decided" in key.lower() or "instead" in key.lower()
|
||||
|
||||
def test_truncates_long_sentences(self):
|
||||
d = Dialect()
|
||||
text = "a " * 100 # very long
|
||||
key = d._extract_key_sentence(text)
|
||||
assert len(key) <= 55
|
||||
|
||||
|
||||
class TestCompressionStats:
|
||||
def test_stats(self):
|
||||
d = Dialect()
|
||||
original = "We decided to use GraphQL instead of REST. " * 10
|
||||
compressed = d.compress(original)
|
||||
stats = d.compression_stats(original, compressed)
|
||||
assert stats["ratio"] > 1
|
||||
assert stats["original_chars"] > stats["compressed_chars"]
|
||||
|
||||
def test_count_tokens(self):
|
||||
assert Dialect.count_tokens("hello world") == len("hello world") // 3
|
||||
|
||||
|
||||
class TestZettelEncoding:
|
||||
def test_encode_zettel(self):
|
||||
d = Dialect(entities={"Alice": "ALC"})
|
||||
zettel = {
|
||||
"id": "zettel-001",
|
||||
"people": ["Alice"],
|
||||
"topics": ["memory", "ai"],
|
||||
"content": 'She said "I want to remember everything"',
|
||||
"emotional_weight": 0.9,
|
||||
"emotional_tone": ["joy"],
|
||||
"origin_moment": False,
|
||||
"sensitivity": "",
|
||||
"notes": "",
|
||||
"origin_label": "",
|
||||
"title": "Test - Memory Discussion",
|
||||
}
|
||||
result = d.encode_zettel(zettel)
|
||||
assert "ALC" in result
|
||||
assert "memory" in result
|
||||
|
||||
def test_encode_tunnel(self):
|
||||
d = Dialect()
|
||||
tunnel = {"from": "zettel-001", "to": "zettel-002", "label": "follows: temporal"}
|
||||
result = d.encode_tunnel(tunnel)
|
||||
assert "T:" in result
|
||||
assert "001" in result
|
||||
assert "002" in result
|
||||
|
||||
|
||||
class TestDecode:
|
||||
def test_decode_roundtrip(self):
|
||||
d = Dialect()
|
||||
encoded = "001|ALC+BOB|2025-01-01|test_title\nARC:journey\n001:ALC|memory_ai|\"test quote\"|0.9|joy"
|
||||
decoded = d.decode(encoded)
|
||||
assert decoded["header"]["file"] == "001"
|
||||
assert decoded["arc"] == "journey"
|
||||
assert len(decoded["zettels"]) == 1
|
||||
Reference in New Issue
Block a user