test: add comprehensive test coverage (35% → 58%, threshold 50%)

Add 180+ new tests across 10 test files covering previously untested modules: - instructions_cli (0% → 100%), hooks_cli (73% → 96%), spellcheck (28% → 84%) - palace_graph (9% → 91%), general_extractor (0% → 92%), entity_detector (0% → 69%) - entity_registry (0% → 70%), room_detector_local (0% → 55%), layers (0% → 28%) - onboarding (0% → 36%) Also fixes Windows encoding bug in onboarding.py (write_text without encoding="utf-8"). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-08 20:54:41 +03:00
parent fcc9ce84f2
commit 03e9b57108
12 changed files with 1901 additions and 3 deletions
@@ -0,0 +1,242 @@
+"""Tests for mempalace.general_extractor."""
+
+from mempalace.general_extractor import (
+    ALL_MARKERS,
+    NEGATIVE_WORDS,
+    POSITIVE_WORDS,
+    _extract_prose,
+    _get_sentiment,
+    _has_resolution,
+    _is_code_line,
+    _score_markers,
+    _split_into_segments,
+    extract_memories,
+)
+
+
+# ── extract_memories — empty / no markers ───────────────────────────────
+
+
+def test_extract_memories_empty_text():
+    result = extract_memories("")
+    assert result == []
+
+
+def test_extract_memories_no_markers():
+    result = extract_memories("The quick brown fox jumped over the lazy dog.")
+    assert result == []
+
+
+def test_extract_memories_short_text_skipped():
+    # Paragraphs shorter than 20 chars are skipped
+    result = extract_memories("ok sure")
+    assert result == []
+
+
+# ── extract_memories — decision markers ─────────────────────────────────
+
+
+def test_extract_memories_decision():
+    text = (
+        "We decided to go with PostgreSQL instead of MySQL "
+        "because the performance was better for our use case. "
+        "The trade-off was more complexity in setup."
+    )
+    result = extract_memories(text)
+    assert len(result) >= 1
+    assert any(m["memory_type"] == "decision" for m in result)
+
+
+# ── extract_memories — preference markers ───────────────────────────────
+
+
+def test_extract_memories_preference():
+    text = (
+        "I prefer using snake_case in Python code. "
+        "Please always use type hints. "
+        "Never use wildcard imports."
+    )
+    result = extract_memories(text)
+    assert len(result) >= 1
+    assert any(m["memory_type"] == "preference" for m in result)
+
+
+# ── extract_memories — milestone markers ────────────────────────────────
+
+
+def test_extract_memories_milestone():
+    text = (
+        "It finally works! After three days of debugging, "
+        "I figured out the issue. The breakthrough was realizing "
+        "the config file was cached. Got it working at 2am."
+    )
+    result = extract_memories(text)
+    assert len(result) >= 1
+    assert any(m["memory_type"] == "milestone" for m in result)
+
+
+# ── extract_memories — problem markers ──────────────────────────────────
+
+
+def test_extract_memories_problem():
+    text = (
+        "There's a critical bug in the auth module. "
+        "The error keeps crashing the server. "
+        "The root cause was a missing null check. "
+        "The problem is that tokens expire silently."
+    )
+    result = extract_memories(text)
+    assert len(result) >= 1
+    types = {m["memory_type"] for m in result}
+    assert "problem" in types or "milestone" in types  # resolved problems become milestones
+
+
+# ── extract_memories — emotional markers ────────────────────────────────
+
+
+def test_extract_memories_emotional():
+    text = (
+        "I feel so proud of what we built together. "
+        "I love working on this project, it makes me happy. "
+        "I'm grateful for the team and the beautiful code we wrote."
+    )
+    result = extract_memories(text)
+    assert len(result) >= 1
+    assert any(m["memory_type"] == "emotional" for m in result)
+
+
+# ── extract_memories — chunk_index ──────────────────────────────────────
+
+
+def test_extract_memories_chunk_index_increments():
+    text = (
+        "We decided to use React because it fits our team.\n\n"
+        "I prefer functional components always.\n\n"
+        "It works! We finally shipped the v1.0 release."
+    )
+    result = extract_memories(text)
+    if len(result) >= 2:
+        indices = [m["chunk_index"] for m in result]
+        assert indices == list(range(len(result)))
+
+
+# ── _score_markers ──────────────────────────────────────────────────────
+
+
+def test_score_markers_with_matches():
+    score, keywords = _score_markers(
+        "we decided to go with postgres because it is faster",
+        ALL_MARKERS["decision"],
+    )
+    assert score > 0
+    assert len(keywords) > 0
+
+
+def test_score_markers_no_matches():
+    score, keywords = _score_markers("nothing relevant here", ALL_MARKERS["decision"])
+    assert score == 0.0
+
+
+# ── _get_sentiment ──────────────────────────────────────────────────────
+
+
+def test_get_sentiment_positive():
+    assert _get_sentiment("I am so happy and proud of this breakthrough") == "positive"
+
+
+def test_get_sentiment_negative():
+    assert _get_sentiment("This bug caused a crash and total failure") == "negative"
+
+
+def test_get_sentiment_neutral():
+    assert _get_sentiment("The meeting is at three") == "neutral"
+
+
+# ── _has_resolution ─────────────────────────────────────────────────────
+
+
+def test_has_resolution_true():
+    assert _has_resolution("I fixed the auth bug and it works now") is True
+
+
+def test_has_resolution_false():
+    assert _has_resolution("The server keeps crashing") is False
+
+
+# ── _is_code_line ───────────────────────────────────────────────────────
+
+
+def test_is_code_line_detects_code():
+    assert _is_code_line("  import os") is True
+    assert _is_code_line("  $ pip install flask") is True
+    assert _is_code_line("  ```python") is True
+
+
+def test_is_code_line_allows_prose():
+    assert _is_code_line("This is a regular sentence about coding.") is False
+    assert _is_code_line("") is False
+
+
+# ── _extract_prose ──────────────────────────────────────────────────────
+
+
+def test_extract_prose_strips_code_blocks():
+    text = "Hello world\n```\nimport os\nprint('hi')\n```\nGoodbye"
+    result = _extract_prose(text)
+    assert "import os" not in result
+    assert "Hello world" in result
+    assert "Goodbye" in result
+
+
+def test_extract_prose_returns_original_if_all_code():
+    text = "import os\nfrom sys import argv"
+    result = _extract_prose(text)
+    # Falls back to original text if nothing left
+    assert len(result) > 0
+
+
+# ── _split_into_segments ───────────────────────────────────────────────
+
+
+def test_split_into_segments_by_paragraph():
+    text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
+    result = _split_into_segments(text)
+    assert len(result) == 3
+
+
+def test_split_into_segments_by_turns():
+    lines = []
+    for i in range(5):
+        lines.append(f"Human: Question {i}")
+        lines.append(f"Assistant: Answer {i}")
+    text = "\n".join(lines)
+    result = _split_into_segments(text)
+    assert len(result) >= 3  # turn-based splitting should fire
+
+
+def test_split_into_segments_single_block():
+    # Many lines without double-newline produces chunked segments
+    lines = [f"Line {i} of the document" for i in range(30)]
+    text = "\n".join(lines)
+    result = _split_into_segments(text)
+    assert len(result) >= 1
+
+
+# ── ALL_MARKERS constant ───────────────────────────────────────────────
+
+
+def test_all_markers_has_five_types():
+    assert set(ALL_MARKERS.keys()) == {"decision", "preference", "milestone", "problem", "emotional"}
+
+
+# ── POSITIVE_WORDS / NEGATIVE_WORDS ────────────────────────────────────
+
+
+def test_positive_words():
+    assert "happy" in POSITIVE_WORDS
+    assert "proud" in POSITIVE_WORDS
+
+
+def test_negative_words():
+    assert "bug" in NEGATIVE_WORDS
+    assert "crash" in NEGATIVE_WORDS