test: expand coverage from 20 to 92 tests, migrate to uv

- Migrate from setuptools to hatchling build backend - Add dependency-groups (PEP 735) for dev tooling (pytest, ruff) - Remove redundant requirements.txt in favor of uv.lock - Fix __version__ mismatch (2.0.0 -> 3.0.0 to match pyproject.toml) New test files: - conftest.py: shared fixtures (isolated palace, KG, ChromaDB collection) - test_knowledge_graph.py: 17 tests (entity CRUD, temporal queries, timeline) - test_mcp_server.py: 25 tests (protocol dispatch, read/write/KG/diary tools) - test_searcher.py: 7 tests (search_memories API, filters, error handling) - test_dialect.py: 13 tests (AAAK compression, entity/emotion detection, zettel encoding) All 92 tests pass on Python 3.13 with chromadb 0.6.3.
2026-04-07 17:07:02 -03:00
parent 0b0e123f42
commit 72c548b729
8 changed files with 3943 additions and 11 deletions
@@ -0,0 +1,155 @@
+"""
+test_dialect.py — Tests for the AAAK Dialect compression system.
+
+Covers plain text compression, entity detection, emotion detection,
+topic extraction, key sentence extraction, zettel encoding, and stats.
+"""
+
+from mempalace.dialect import Dialect
+
+
+class TestPlainTextCompression:
+    def test_compress_basic(self):
+        d = Dialect()
+        result = d.compress("We decided to use GraphQL instead of REST for the API layer.")
+        assert isinstance(result, str)
+        assert len(result) > 0
+        # AAAK format uses pipe-separated fields
+        assert "|" in result
+
+    def test_compress_with_metadata(self):
+        d = Dialect()
+        result = d.compress(
+            "Authentication now uses JWT tokens.",
+            metadata={"wing": "project", "room": "backend", "source_file": "auth.py"},
+        )
+        assert "project" in result
+        assert "backend" in result
+
+    def test_compress_produces_entity_codes(self):
+        d = Dialect(entities={"Alice": "ALC", "Bob": "BOB"})
+        result = d.compress("Alice told Bob about the new deployment strategy.")
+        assert "ALC" in result or "BOB" in result
+
+    def test_compress_empty_text(self):
+        d = Dialect()
+        result = d.compress("")
+        assert isinstance(result, str)
+
+
+class TestEntityDetection:
+    def test_known_entities(self):
+        d = Dialect(entities={"Alice": "ALC"})
+        found = d._detect_entities_in_text("Alice went to the store.")
+        assert "ALC" in found
+
+    def test_auto_code_unknown_entities(self):
+        d = Dialect()
+        found = d._detect_entities_in_text("I spoke with Bernardo about the project today.")
+        assert any(code for code in found if len(code) == 3)
+
+    def test_skip_names(self):
+        d = Dialect(entities={"Gandalf": "GAN"}, skip_names=["Gandalf"])
+        code = d.encode_entity("Gandalf")
+        assert code is None
+
+
+class TestEmotionDetection:
+    def test_detect_emotions(self):
+        d = Dialect()
+        emotions = d._detect_emotions("I'm really excited and happy about this breakthrough!")
+        assert len(emotions) > 0
+
+    def test_max_three_emotions(self):
+        d = Dialect()
+        text = "I feel scared, happy, angry, surprised, disgusted, and confused."
+        emotions = d._detect_emotions(text)
+        assert len(emotions) <= 3
+
+
+class TestTopicExtraction:
+    def test_extract_topics(self):
+        d = Dialect()
+        topics = d._extract_topics(
+            "The Python authentication server uses PostgreSQL for storage "
+            "and Redis for caching sessions."
+        )
+        assert len(topics) > 0
+        assert len(topics) <= 3
+
+    def test_boosts_technical_terms(self):
+        d = Dialect()
+        topics = d._extract_topics("GraphQL vs REST: we chose GraphQL for the new API endpoint.")
+        # "graphql" should appear since it's mentioned twice + capitalized
+        topic_lower = [t.lower() for t in topics]
+        assert "graphql" in topic_lower
+
+
+class TestKeySentenceExtraction:
+    def test_extract_key_sentence(self):
+        d = Dialect()
+        text = (
+            "The server runs on port 3000. "
+            "We decided to use PostgreSQL instead of MongoDB. "
+            "The config file needs updating."
+        )
+        key = d._extract_key_sentence(text)
+        assert "decided" in key.lower() or "instead" in key.lower()
+
+    def test_truncates_long_sentences(self):
+        d = Dialect()
+        text = "a " * 100  # very long
+        key = d._extract_key_sentence(text)
+        assert len(key) <= 55
+
+
+class TestCompressionStats:
+    def test_stats(self):
+        d = Dialect()
+        original = "We decided to use GraphQL instead of REST. " * 10
+        compressed = d.compress(original)
+        stats = d.compression_stats(original, compressed)
+        assert stats["ratio"] > 1
+        assert stats["original_chars"] > stats["compressed_chars"]
+
+    def test_count_tokens(self):
+        assert Dialect.count_tokens("hello world") == len("hello world") // 3
+
+
+class TestZettelEncoding:
+    def test_encode_zettel(self):
+        d = Dialect(entities={"Alice": "ALC"})
+        zettel = {
+            "id": "zettel-001",
+            "people": ["Alice"],
+            "topics": ["memory", "ai"],
+            "content": 'She said "I want to remember everything"',
+            "emotional_weight": 0.9,
+            "emotional_tone": ["joy"],
+            "origin_moment": False,
+            "sensitivity": "",
+            "notes": "",
+            "origin_label": "",
+            "title": "Test - Memory Discussion",
+        }
+        result = d.encode_zettel(zettel)
+        assert "ALC" in result
+        assert "memory" in result
+
+    def test_encode_tunnel(self):
+        d = Dialect()
+        tunnel = {"from": "zettel-001", "to": "zettel-002", "label": "follows: temporal"}
+        result = d.encode_tunnel(tunnel)
+        assert "T:" in result
+        assert "001" in result
+        assert "002" in result
+
+
+class TestDecode:
+    def test_decode_roundtrip(self):
+        d = Dialect()
+        encoded = "001|ALC+BOB|2025-01-01|test_title\nARC:journey\n001:ALC|memory_ai|\"test quote\"|0.9|joy"
+        decoded = d.decode(encoded)
+        assert decoded["header"]["file"] == "001"
+        assert decoded["arc"] == "journey"
+        assert len(decoded["zettels"]) == 1