tests/test_searcher.py

"""
test_searcher.py -- Tests for both search() (CLI) and search_memories() (API).

Uses the real ChromaDB fixtures from conftest.py for integration tests,
plus mock-based tests for error paths.
"""

from unittest.mock import MagicMock, patch

import pytest

from mempalace.searcher import SearchError, search, search_memories


# ── search_memories (API) ──────────────────────────────────────────────


class TestSearchMemories:
    def test_basic_search(self, palace_path, seeded_collection):
        result = search_memories("JWT authentication", palace_path)
        assert "results" in result
        assert len(result["results"]) > 0
        assert result["query"] == "JWT authentication"

    def test_wing_filter(self, palace_path, seeded_collection):
        result = search_memories("planning", palace_path, wing="notes")
        assert all(r["wing"] == "notes" for r in result["results"])

    def test_room_filter(self, palace_path, seeded_collection):
        result = search_memories("database", palace_path, room="backend")
        assert all(r["room"] == "backend" for r in result["results"])

    def test_wing_and_room_filter(self, palace_path, seeded_collection):
        result = search_memories("code", palace_path, wing="project", room="frontend")
        assert all(r["wing"] == "project" and r["room"] == "frontend" for r in result["results"])

    def test_n_results_limit(self, palace_path, seeded_collection):
        result = search_memories("code", palace_path, n_results=2)
        assert len(result["results"]) <= 2

    def test_no_palace_returns_error(self, tmp_path):
        result = search_memories("anything", str(tmp_path / "missing"))
        assert "error" in result

    def test_result_fields(self, palace_path, seeded_collection):
        result = search_memories("authentication", palace_path)
        hit = result["results"][0]
        assert "text" in hit
        assert "wing" in hit
        assert "room" in hit
        assert "source_file" in hit
        assert "similarity" in hit
        assert isinstance(hit["similarity"], float)
        assert "created_at" in hit

    def test_created_at_contains_filed_at(self, palace_path, seeded_collection):
        """created_at surfaces the filed_at metadata from the drawer."""
        result = search_memories("JWT authentication", palace_path)
        hit = result["results"][0]
        assert hit["created_at"] == "2026-01-01T00:00:00"

    def test_created_at_fallback_when_filed_at_missing(self):
        """created_at defaults to 'unknown' when filed_at is absent."""
        mock_col = MagicMock()
        mock_col.query.return_value = {
            "ids": [["drawer_no_date"]],
            "documents": [["Some text without a date"]],
            "metadatas": [[{"wing": "project", "room": "backend", "source_file": "x.py"}]],
            "distances": [[0.1]],
        }

        with patch("mempalace.searcher.get_collection", return_value=mock_col):
            result = search_memories("test", "/fake/path")
        hit = result["results"][0]
        assert hit["created_at"] == "unknown"

    def test_search_memories_query_error(self):
        """search_memories returns error dict when query raises."""
        mock_col = MagicMock()
        mock_col.query.side_effect = RuntimeError("query failed")

        with patch("mempalace.searcher.get_collection", return_value=mock_col):
            result = search_memories("test", "/fake/path")
        assert "error" in result
        assert "query failed" in result["error"]

    def test_search_memories_filters_in_result(self, palace_path, seeded_collection):
        result = search_memories("test", palace_path, wing="project", room="backend")
        assert result["filters"]["wing"] == "project"
        assert result["filters"]["room"] == "backend"

    def test_search_memories_handles_none_metadata(self):
        """API path: `None` entries in the drawer results' metadatas list must
        fall back to the sentinel strings (wing/room 'unknown', source '?')
        rather than raising `AttributeError: 'NoneType' object has no
        attribute 'get'` while the rest of the result set renders."""
        mock_col = MagicMock()
        mock_col.query.return_value = {
            "documents": [["first doc", "second doc"]],
            "metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
            "distances": [[0.1, 0.2]],
            "ids": [["d1", "d2"]],
        }

        def mock_get_collection(path, create=False):
            # First call: drawers. Second call: closets — raise so hybrid
            # degrades to pure drawer search (the catch block covers it).
            if not hasattr(mock_get_collection, "_called"):
                mock_get_collection._called = True
                return mock_col
            raise RuntimeError("no closets")

        with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
            result = search_memories("anything", "/fake/path")
        assert "results" in result
        assert len(result["results"]) == 2
        # The None-metadata hit renders with sentinel values, not a crash.
        none_hit = result["results"][1]
        assert none_hit["text"] == "second doc"
        assert none_hit["wing"] == "unknown"
        assert none_hit["room"] == "unknown"


# ── search() (CLI print function) ─────────────────────────────────────


class TestSearchCLI:
    def test_search_prints_results(self, palace_path, seeded_collection, capsys):
        search("JWT authentication", palace_path)
        captured = capsys.readouterr()
        assert "JWT" in captured.out or "authentication" in captured.out

    def test_search_with_wing_filter(self, palace_path, seeded_collection, capsys):
        search("planning", palace_path, wing="notes")
        captured = capsys.readouterr()
        assert "Results for" in captured.out

    def test_search_with_room_filter(self, palace_path, seeded_collection, capsys):
        search("database", palace_path, room="backend")
        captured = capsys.readouterr()
        assert "Room:" in captured.out

    def test_search_with_wing_and_room(self, palace_path, seeded_collection, capsys):
        search("code", palace_path, wing="project", room="frontend")
        captured = capsys.readouterr()
        assert "Wing:" in captured.out
        assert "Room:" in captured.out

    def test_search_no_palace_raises(self, tmp_path):
        with pytest.raises(SearchError, match="No palace found"):
            search("anything", str(tmp_path / "missing"))

    def test_search_no_results(self, palace_path, collection, capsys):
        """Empty collection returns no results message."""
        # collection is empty (no seeded data)
        result = search("xyzzy_nonexistent_query", palace_path, n_results=1)
        captured = capsys.readouterr()
        # Either prints "No results" or returns None
        assert result is None or "No results" in captured.out

    def test_search_query_error_raises(self):
        """search raises SearchError when query fails."""
        mock_col = MagicMock()
        mock_col.query.side_effect = RuntimeError("boom")

        with patch("mempalace.searcher.get_collection", return_value=mock_col):
            with pytest.raises(SearchError, match="Search error"):
                search("test", "/fake/path")

    def test_search_n_results(self, palace_path, seeded_collection, capsys):
        search("code", palace_path, n_results=1)
        captured = capsys.readouterr()
        # Should have output with at least one result block
        assert "[1]" in captured.out

    def test_search_handles_none_metadata_without_crash(self, palace_path, capsys):
        """ChromaDB can return `None` entries in the metadatas list when a
        drawer has no metadata. The CLI print path must not crash on them
        mid-render — it used to raise `AttributeError: 'NoneType' object has
        no attribute 'get'` after printing earlier results."""
        mock_col = MagicMock()
        mock_col.query.return_value = {
            "documents": [["first doc", "second doc"]],
            "metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
            "distances": [[0.1, 0.2]],
        }
        with patch("mempalace.searcher.get_collection", return_value=mock_col):
            search("anything", "/fake/path")
        captured = capsys.readouterr()
        assert "[1]" in captured.out
        assert "[2]" in captured.out
        # Second result renders with fallback '?' values instead of crashing
        assert "second doc" in captured.out
test: expand coverage from 20 to 92 tests, migrate to uv 2026-04-07 17:07:02 -03:00			`"""`
test: expand coverage to 70%, fix mcp_server CI crash (threshold 60%) 2026-04-08 21:07:03 +03:00			`test_searcher.py -- Tests for both search() (CLI) and search_memories() (API).`
test: expand coverage from 20 to 92 tests, migrate to uv 2026-04-07 17:07:02 -03:00
test: expand coverage to 70%, fix mcp_server CI crash (threshold 60%) 2026-04-08 21:07:03 +03:00			`Uses the real ChromaDB fixtures from conftest.py for integration tests,`
			`plus mock-based tests for error paths.`
test: expand coverage from 20 to 92 tests, migrate to uv 2026-04-07 17:07:02 -03:00			`"""`

test: expand coverage to 70%, fix mcp_server CI crash (threshold 60%) 2026-04-08 21:07:03 +03:00			`from unittest.mock import MagicMock, patch`

			`import pytest`

			`from mempalace.searcher import SearchError, search, search_memories`


			`# ── search_memories (API) ──────────────────────────────────────────────`
test: expand coverage from 20 to 92 tests, migrate to uv 2026-04-07 17:07:02 -03:00

			`class TestSearchMemories:`
			`def test_basic_search(self, palace_path, seeded_collection):`
			`result = search_memories("JWT authentication", palace_path)`
			`assert "results" in result`
			`assert len(result["results"]) > 0`
			`assert result["query"] == "JWT authentication"`

			`def test_wing_filter(self, palace_path, seeded_collection):`
			`result = search_memories("planning", palace_path, wing="notes")`
			`assert all(r["wing"] == "notes" for r in result["results"])`

			`def test_room_filter(self, palace_path, seeded_collection):`
			`result = search_memories("database", palace_path, room="backend")`
			`assert all(r["room"] == "backend" for r in result["results"])`

			`def test_wing_and_room_filter(self, palace_path, seeded_collection):`
			`result = search_memories("code", palace_path, wing="project", room="frontend")`
fix: CI failures — update workflow for uv migration, fix lint and format 2026-04-07 17:59:21 -03:00			`assert all(r["wing"] == "project" and r["room"] == "frontend" for r in result["results"])`
test: expand coverage from 20 to 92 tests, migrate to uv 2026-04-07 17:07:02 -03:00
			`def test_n_results_limit(self, palace_path, seeded_collection):`
			`result = search_memories("code", palace_path, n_results=2)`
			`assert len(result["results"]) <= 2`

fix: address Copilot review — derive MCP version, improve test isolation and portability 2026-04-07 17:29:12 -03:00			`def test_no_palace_returns_error(self, tmp_path):`
			`result = search_memories("anything", str(tmp_path / "missing"))`
test: expand coverage from 20 to 92 tests, migrate to uv 2026-04-07 17:07:02 -03:00			`assert "error" in result`

			`def test_result_fields(self, palace_path, seeded_collection):`
			`result = search_memories("authentication", palace_path)`
			`hit = result["results"][0]`
			`assert "text" in hit`
			`assert "wing" in hit`
			`assert "room" in hit`
			`assert "source_file" in hit`
			`assert "similarity" in hit`
			`assert isinstance(hit["similarity"], float)`
feat: include created_at timestamp in search results (#846 ) 2026-04-15 03:26:57 -04:00			`assert "created_at" in hit`

			`def test_created_at_contains_filed_at(self, palace_path, seeded_collection):`
			`"""created_at surfaces the filed_at metadata from the drawer."""`
			`result = search_memories("JWT authentication", palace_path)`
			`hit = result["results"][0]`
			`assert hit["created_at"] == "2026-01-01T00:00:00"`

			`def test_created_at_fallback_when_filed_at_missing(self):`
			`"""created_at defaults to 'unknown' when filed_at is absent."""`
			`mock_col = MagicMock()`
			`mock_col.query.return_value = {`
			`"ids": [["drawer_no_date"]],`
			`"documents": [["Some text without a date"]],`
			`"metadatas": [[{"wing": "project", "room": "backend", "source_file": "x.py"}]],`
			`"distances": [[0.1]],`
			`}`

			`with patch("mempalace.searcher.get_collection", return_value=mock_col):`
			`result = search_memories("test", "/fake/path")`
			`hit = result["results"][0]`
			`assert hit["created_at"] == "unknown"`
test: expand coverage to 70%, fix mcp_server CI crash (threshold 60%) 2026-04-08 21:07:03 +03:00
			`def test_search_memories_query_error(self):`
			`"""search_memories returns error dict when query raises."""`
			`mock_col = MagicMock()`
			`mock_col.query.side_effect = RuntimeError("query failed")`

Мempalace backend seam (#413 ) 2026-04-11 19:16:49 -04:00			`with patch("mempalace.searcher.get_collection", return_value=mock_col):`
test: expand coverage to 70%, fix mcp_server CI crash (threshold 60%) 2026-04-08 21:07:03 +03:00			`result = search_memories("test", "/fake/path")`
			`assert "error" in result`
			`assert "query failed" in result["error"]`

			`def test_search_memories_filters_in_result(self, palace_path, seeded_collection):`
			`result = search_memories("test", palace_path, wing="project", room="backend")`
			`assert result["filters"]["wing"] == "project"`
			`assert result["filters"]["room"] == "backend"`

fix(searcher): guard API path + closet loop against None metadata too 2026-04-18 10:37:05 -07:00			`def test_search_memories_handles_none_metadata(self):`
			"""API path: `None` entries in the drawer results' metadatas list must
			`fall back to the sentinel strings (wing/room 'unknown', source '?')`
			rather than raising `AttributeError: 'NoneType' object has no
			attribute 'get'` while the rest of the result set renders."""
			`mock_col = MagicMock()`
			`mock_col.query.return_value = {`
			`"documents": [["first doc", "second doc"]],`
			`"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],`
			`"distances": [[0.1, 0.2]],`
			`"ids": [["d1", "d2"]],`
			`}`

			`def mock_get_collection(path, create=False):`
			`# First call: drawers. Second call: closets — raise so hybrid`
			`# degrades to pure drawer search (the catch block covers it).`
			`if not hasattr(mock_get_collection, "_called"):`
			`mock_get_collection._called = True`
			`return mock_col`
			`raise RuntimeError("no closets")`

			`with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):`
			`result = search_memories("anything", "/fake/path")`
			`assert "results" in result`
			`assert len(result["results"]) == 2`
			`# The None-metadata hit renders with sentinel values, not a crash.`
			`none_hit = result["results"][1]`
			`assert none_hit["text"] == "second doc"`
			`assert none_hit["wing"] == "unknown"`
			`assert none_hit["room"] == "unknown"`

test: expand coverage to 70%, fix mcp_server CI crash (threshold 60%) 2026-04-08 21:07:03 +03:00
			`# ── search() (CLI print function) ─────────────────────────────────────`


			`class TestSearchCLI:`
			`def test_search_prints_results(self, palace_path, seeded_collection, capsys):`
			`search("JWT authentication", palace_path)`
			`captured = capsys.readouterr()`
			`assert "JWT" in captured.out or "authentication" in captured.out`

			`def test_search_with_wing_filter(self, palace_path, seeded_collection, capsys):`
			`search("planning", palace_path, wing="notes")`
			`captured = capsys.readouterr()`
			`assert "Results for" in captured.out`

			`def test_search_with_room_filter(self, palace_path, seeded_collection, capsys):`
			`search("database", palace_path, room="backend")`
			`captured = capsys.readouterr()`
			`assert "Room:" in captured.out`

			`def test_search_with_wing_and_room(self, palace_path, seeded_collection, capsys):`
			`search("code", palace_path, wing="project", room="frontend")`
			`captured = capsys.readouterr()`
			`assert "Wing:" in captured.out`
			`assert "Room:" in captured.out`

			`def test_search_no_palace_raises(self, tmp_path):`
			`with pytest.raises(SearchError, match="No palace found"):`
			`search("anything", str(tmp_path / "missing"))`

			`def test_search_no_results(self, palace_path, collection, capsys):`
			`"""Empty collection returns no results message."""`
			`# collection is empty (no seeded data)`
			`result = search("xyzzy_nonexistent_query", palace_path, n_results=1)`
			`captured = capsys.readouterr()`
			`# Either prints "No results" or returns None`
			`assert result is None or "No results" in captured.out`

			`def test_search_query_error_raises(self):`
			`"""search raises SearchError when query fails."""`
			`mock_col = MagicMock()`
			`mock_col.query.side_effect = RuntimeError("boom")`

Мempalace backend seam (#413 ) 2026-04-11 19:16:49 -04:00			`with patch("mempalace.searcher.get_collection", return_value=mock_col):`
test: expand coverage to 70%, fix mcp_server CI crash (threshold 60%) 2026-04-08 21:07:03 +03:00			`with pytest.raises(SearchError, match="Search error"):`
			`search("test", "/fake/path")`

			`def test_search_n_results(self, palace_path, seeded_collection, capsys):`
			`search("code", palace_path, n_results=1)`
			`captured = capsys.readouterr()`
			`# Should have output with at least one result block`
			`assert "[1]" in captured.out`
fix(searcher): guard against None metadata in CLI print path 2026-04-18 10:00:59 -07:00
			`def test_search_handles_none_metadata_without_crash(self, palace_path, capsys):`
			"""ChromaDB can return `None` entries in the metadatas list when a
			`drawer has no metadata. The CLI print path must not crash on them`
			mid-render — it used to raise `AttributeError: 'NoneType' object has
			no attribute 'get'` after printing earlier results."""
			`mock_col = MagicMock()`
			`mock_col.query.return_value = {`
			`"documents": [["first doc", "second doc"]],`
			`"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],`
			`"distances": [[0.1, 0.2]],`
			`}`
			`with patch("mempalace.searcher.get_collection", return_value=mock_col):`
			`search("anything", "/fake/path")`
			`captured = capsys.readouterr()`
			`assert "[1]" in captured.out`
			`assert "[2]" in captured.out`
			`# Second result renders with fallback '?' values instead of crashing`
			`assert "second doc" in captured.out`