From bb577bb41f547a18181a8bfb7a977b6b3bc0bf72 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:15:01 +0000 Subject: [PATCH 1/9] Initial plan From c478dfa1736a41c13a43460560af917e7bc4cf59 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:21:42 +0000 Subject: [PATCH 2/9] fix: harden palace security checks Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/775f2fc4-3051-462e-8586-6d694b55da0d Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/cli.py | 20 +++++++++++-- mempalace/mcp_server.py | 34 ++++++++++++++++++++-- mempalace/migrate.py | 33 +++++++++++++++++++-- mempalace/query_sanitizer.py | 22 ++++++++++---- tests/test_cli.py | 41 +++++++++++++++++++++++++- tests/test_mcp_server.py | 55 +++++++++++++++++++++++++++++++++++ tests/test_migrate.py | 45 ++++++++++++++++++++++++++++ tests/test_query_sanitizer.py | 3 ++ 8 files changed, 238 insertions(+), 15 deletions(-) create mode 100644 tests/test_migrate.py diff --git a/mempalace/cli.py b/mempalace/cli.py index 803719d..3244c8b 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -156,7 +156,7 @@ def cmd_migrate(args): from .migrate import migrate palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path - migrate(palace_path=palace_path, dry_run=args.dry_run) + migrate(palace_path=palace_path, dry_run=args.dry_run, confirm=getattr(args, "yes", False)) def cmd_status(args): @@ -170,12 +170,19 @@ def cmd_repair(args): """Rebuild palace vector index from SQLite metadata.""" import chromadb import shutil + from .migrate import confirm_destructive_action, has_palace_database - palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + palace_path = os.path.abspath( + os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + ) + db_path = os.path.join(palace_path, "chroma.sqlite3") if not os.path.isdir(palace_path): print(f"\n No palace found at {palace_path}") return + if not has_palace_database(palace_path): + print(f"\n No palace database found at {db_path}") + return print(f"\n{'=' * 55}") print(" MemPalace Repair") @@ -197,6 +204,9 @@ def cmd_repair(args): print(" Nothing to repair.") return + if not confirm_destructive_action("Repair", palace_path, assume_yes=getattr(args, "yes", False)): + return + # Extract all drawers in batches print("\n Extracting drawers...") batch_size = 5000 @@ -216,6 +226,9 @@ def cmd_repair(args): palace_path = palace_path.rstrip(os.sep) backup_path = palace_path + ".backup" if os.path.exists(backup_path): + if not has_palace_database(backup_path): + print(f" Refusing to delete non-palace backup path: {backup_path}") + return shutil.rmtree(backup_path) print(f" Backing up to {backup_path}...") shutil.copytree(palace_path, backup_path) @@ -532,7 +545,7 @@ def main(): sub.add_parser( "repair", help="Rebuild palace vector index from stored data (fixes segfaults after corruption)", - ) + ).add_argument("--yes", action="store_true", help="Skip confirmation for destructive changes") # mcp sub.add_parser( @@ -551,6 +564,7 @@ def main(): action="store_true", help="Show what would be migrated without changing anything", ) + p_migrate.add_argument("--yes", action="store_true", help="Skip confirmation for destructive changes") sub.add_parser("status", help="Show what's been filed") diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 2330555..9725043 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -94,7 +94,9 @@ else: pass # Keys whose values should be redacted in WAL entries to avoid logging sensitive content -_WAL_REDACT_KEYS = frozenset({"content_preview", "entry_preview"}) +_WAL_REDACT_KEYS = frozenset( + {"content", "content_preview", "document", "entry", "entry_preview", "query", "text"} +) def _wal_log(operation: str, params: dict, result: dict = None): @@ -212,6 +214,13 @@ def _get_cached_metadata(col, where=None): return result +def _sanitize_optional_name(value: str = None, field_name: str = "name") -> str: + """Validate optional wing/room-style filters.""" + if value is None: + return None + return sanitize_name(value, field_name) + + # ==================== READ TOOLS ==================== @@ -296,6 +305,10 @@ def tool_list_wings(): def tool_list_rooms(wing: str = None): + try: + wing = _sanitize_optional_name(wing, "wing") + except ValueError as e: + return {"error": str(e)} col = _get_collection() if not col: return _no_palace() @@ -345,6 +358,11 @@ def tool_search( context: str = None, ): limit = max(1, min(limit, _MAX_RESULTS)) + try: + wing = _sanitize_optional_name(wing, "wing") + room = _sanitize_optional_name(room, "room") + except ValueError as e: + return {"error": str(e)} # Backwards compat: accept old name # Backwards compat: convert old similarity scale (higher=stricter) to # distance scale (lower=stricter). Similarity 0.8 → distance 0.2. @@ -425,6 +443,11 @@ def tool_traverse_graph(start_room: str, max_hops: int = 2): def tool_find_tunnels(wing_a: str = None, wing_b: str = None): """Find rooms that bridge two wings — the hallways connecting domains.""" + try: + wing_a = _sanitize_optional_name(wing_a, "wing_a") + wing_b = _sanitize_optional_name(wing_b, "wing_b") + except ValueError as e: + return {"error": str(e)} col = _get_collection() if not col: return _no_palace() @@ -559,6 +582,11 @@ def tool_list_drawers(wing: str = None, room: str = None, limit: int = 20, offse """List drawers with pagination. Optional wing/room filter.""" limit = max(1, min(limit, _MAX_RESULTS)) offset = max(0, offset) + try: + wing = _sanitize_optional_name(wing, "wing") + room = _sanitize_optional_name(room, "room") + except ValueError as e: + return {"error": str(e)} col = _get_collection() if not col: return _no_palace() @@ -1098,8 +1126,8 @@ TOOLS = { "properties": { "query": { "type": "string", - "description": "Short search query ONLY — keywords or a question. Max 200 chars recommended.", - "maxLength": 500, + "description": "Short search query ONLY — keywords or a question. Max 250 chars.", + "maxLength": 250, }, "limit": { "type": "integer", diff --git a/mempalace/migrate.py b/mempalace/migrate.py index 848ab67..715511d 100644 --- a/mempalace/migrate.py +++ b/mempalace/migrate.py @@ -104,14 +104,38 @@ def detect_chromadb_version(db_path: str) -> str: conn.close() -def migrate(palace_path: str, dry_run: bool = False): +def has_palace_database(path: str) -> bool: + """Return True when path looks like a MemPalace ChromaDB directory.""" + return os.path.isfile(os.path.join(path, "chroma.sqlite3")) + + +def confirm_destructive_action(action: str, palace_path: str, assume_yes: bool = False) -> bool: + """Require confirmation before destructive palace operations.""" + if assume_yes: + return True + + print(f"\n {action} will replace data in: {palace_path}") + print(" A backup will be created first, but the original directory will be deleted.") + try: + answer = input(" Continue? [y/N]: ").strip().lower() + except EOFError: + print(" Aborted. Re-run with --yes to confirm destructive changes.") + return False + + if answer not in {"y", "yes"}: + print(" Aborted.") + return False + return True + + +def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): """Migrate a palace to the currently installed ChromaDB version.""" import chromadb - palace_path = os.path.expanduser(palace_path) + palace_path = os.path.abspath(os.path.expanduser(palace_path)) db_path = os.path.join(palace_path, "chroma.sqlite3") - if not os.path.isfile(db_path): + if not os.path.isdir(palace_path) or not has_palace_database(palace_path): print(f"\n No palace database found at {db_path}") return False @@ -166,6 +190,9 @@ def migrate(palace_path: str, dry_run: bool = False): print(f" Would migrate {len(drawers)} drawers.") return True + if not confirm_destructive_action("Migration", palace_path, assume_yes=confirm): + return False + # Backup the old palace timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup_path = f"{palace_path}.pre-migrate.{timestamp}" diff --git a/mempalace/query_sanitizer.py b/mempalace/query_sanitizer.py index 450dd13..cb783f7 100644 --- a/mempalace/query_sanitizer.py +++ b/mempalace/query_sanitizer.py @@ -24,7 +24,7 @@ import logging logger = logging.getLogger("mempalace_mcp") # --- Constants --- -MAX_QUERY_LENGTH = 500 # Above this, system prompt almost certainly dominates +MAX_QUERY_LENGTH = 250 # Above this, prompt contamination increasingly dominates SAFE_QUERY_LENGTH = 200 # Below this, query is almost certainly clean MIN_QUERY_LENGTH = 10 # Extracted result shorter than this = extraction failed @@ -67,6 +67,20 @@ def sanitize_query(raw_query: str) -> dict: raw_query = raw_query.strip() original_length = len(raw_query) + def _trim_candidate(candidate: str) -> str: + candidate = candidate.strip().strip("\"'") + if len(candidate) <= MAX_QUERY_LENGTH: + return candidate + + nested_fragments = [ + frag.strip().strip("\"'") for frag in _SENTENCE_SPLIT.split(candidate) if frag.strip() + ] + for frag in reversed(nested_fragments): + if MIN_QUERY_LENGTH <= len(frag) <= MAX_QUERY_LENGTH: + return frag + + return candidate[-MAX_QUERY_LENGTH:].strip() + # --- Step 1: Short query passthrough --- if original_length <= SAFE_QUERY_LENGTH: return { @@ -106,7 +120,7 @@ def sanitize_query(raw_query: str) -> dict: if len(candidate) >= MIN_QUERY_LENGTH: # Apply length guard if len(candidate) > MAX_QUERY_LENGTH: - candidate = candidate[-MAX_QUERY_LENGTH:] + candidate = _trim_candidate(candidate) logger.warning( "Query sanitized: %d → %d chars (method=question_extraction)", original_length, @@ -126,9 +140,7 @@ def sanitize_query(raw_query: str) -> dict: for seg in reversed(all_segments): seg = seg.strip() if len(seg) >= MIN_QUERY_LENGTH: - candidate = seg - if len(candidate) > MAX_QUERY_LENGTH: - candidate = candidate[-MAX_QUERY_LENGTH:] + candidate = _trim_candidate(seg) logger.warning( "Query sanitized: %d → %d chars (method=tail_sentence)", original_length, diff --git a/tests/test_cli.py b/tests/test_cli.py index dc55f23..0e95a8c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -423,10 +423,24 @@ def test_cmd_repair_no_palace(mock_config_cls, tmp_path, capsys): assert "No palace found" in out +@patch("mempalace.cli.MempalaceConfig") +def test_cmd_repair_requires_palace_database(mock_config_cls, tmp_path, capsys): + palace_dir = tmp_path / "palace" + palace_dir.mkdir() + mock_config_cls.return_value.palace_path = str(palace_dir) + args = argparse.Namespace(palace=None) + mock_chromadb = MagicMock() + with patch.dict("sys.modules", {"chromadb": mock_chromadb}): + cmd_repair(args) + out = capsys.readouterr().out + assert "No palace database found" in out + + @patch("mempalace.cli.MempalaceConfig") def test_cmd_repair_error_reading(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() + (palace_dir / "chroma.sqlite3").write_text("db") mock_config_cls.return_value.palace_path = str(palace_dir) args = argparse.Namespace(palace=None) mock_chromadb = MagicMock() @@ -443,6 +457,7 @@ def test_cmd_repair_error_reading(mock_config_cls, tmp_path, capsys): def test_cmd_repair_zero_drawers(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() + (palace_dir / "chroma.sqlite3").write_text("db") mock_config_cls.return_value.palace_path = str(palace_dir) args = argparse.Namespace(palace=None) mock_chromadb = MagicMock() @@ -461,8 +476,9 @@ def test_cmd_repair_zero_drawers(mock_config_cls, tmp_path, capsys): def test_cmd_repair_success(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() + (palace_dir / "chroma.sqlite3").write_text("db") mock_config_cls.return_value.palace_path = str(palace_dir) - args = argparse.Namespace(palace=None) + args = argparse.Namespace(palace=None, yes=True) mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.count.return_value = 2 @@ -483,6 +499,29 @@ def test_cmd_repair_success(mock_config_cls, tmp_path, capsys): assert "2 drawers rebuilt" in out +@patch("mempalace.cli.MempalaceConfig") +def test_cmd_repair_aborts_without_confirmation(mock_config_cls, tmp_path, capsys): + palace_dir = tmp_path / "palace" + palace_dir.mkdir() + (palace_dir / "chroma.sqlite3").write_text("db") + mock_config_cls.return_value.palace_path = str(palace_dir) + args = argparse.Namespace(palace=None) + mock_chromadb = MagicMock() + mock_col = MagicMock() + mock_col.count.return_value = 1 + mock_client = MagicMock() + mock_client.get_collection.return_value = mock_col + mock_chromadb.PersistentClient.return_value = mock_client + with ( + patch.dict("sys.modules", {"chromadb": mock_chromadb}), + patch("builtins.input", return_value="n"), + ): + cmd_repair(args) + out = capsys.readouterr().out + assert "Aborted." in out + mock_client.create_collection.assert_not_called() + + # ── cmd_compress ─────────────────────────────────────────────────────── diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index ce4499a..32bc7c6 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -8,6 +8,8 @@ via monkeypatch to avoid touching real data. import json +import pytest + def _patch_mcp_server(monkeypatch, config, kg): """Patch the mcp_server module globals to use test fixtures.""" @@ -311,6 +313,59 @@ class TestSearchTool: result_loose = tool_search(query="JWT", max_distance=0.01, min_similarity=999.0) assert len(result_strict["results"]) <= len(result_loose["results"]) + def test_list_rooms_rejects_invalid_wing(self, monkeypatch, config, kg): + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + monkeypatch.setattr(mcp_server, "_get_collection", lambda *args, **kwargs: pytest.fail()) + + result = mcp_server.tool_list_rooms(wing="../etc/passwd") + assert "error" in result + + def test_search_rejects_invalid_room(self, monkeypatch, config, kg): + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + monkeypatch.setattr(mcp_server, "search_memories", lambda *args, **kwargs: pytest.fail()) + + result = mcp_server.tool_search(query="JWT", room="../backend") + assert "error" in result + + def test_list_drawers_rejects_invalid_wing(self, monkeypatch, config, kg): + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + monkeypatch.setattr(mcp_server, "_get_collection", lambda *args, **kwargs: pytest.fail()) + + result = mcp_server.tool_list_drawers(wing="../notes") + assert "error" in result + + def test_find_tunnels_rejects_invalid_wing(self, monkeypatch, config, kg): + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + monkeypatch.setattr(mcp_server, "_get_collection", lambda *args, **kwargs: pytest.fail()) + + result = mcp_server.tool_find_tunnels(wing_a="../project") + assert "error" in result + + def test_wal_redacts_sensitive_fields(self, monkeypatch, config, kg, tmp_path): + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + wal_file = tmp_path / "write_log.jsonl" + monkeypatch.setattr(mcp_server, "_WAL_FILE", wal_file) + + mcp_server._wal_log( + "test", + {"content": "secret note", "query": "private search", "safe": "ok"}, + ) + + entry = json.loads(wal_file.read_text().strip()) + assert entry["params"]["content"].startswith("[REDACTED") + assert entry["params"]["query"].startswith("[REDACTED") + assert entry["params"]["safe"] == "ok" + # ── Write Tools ───────────────────────────────────────────────────────── diff --git a/tests/test_migrate.py b/tests/test_migrate.py new file mode 100644 index 0000000..1a02462 --- /dev/null +++ b/tests/test_migrate.py @@ -0,0 +1,45 @@ +"""Tests for destructive-operation safety in mempalace.migrate.""" + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from mempalace.migrate import migrate + + +def test_migrate_requires_palace_database(tmp_path, capsys): + palace_dir = tmp_path / "palace" + palace_dir.mkdir() + + result = migrate(str(palace_dir)) + + out = capsys.readouterr().out + assert result is False + assert "No palace database found" in out + + +def test_migrate_aborts_without_confirmation(tmp_path, capsys): + palace_dir = tmp_path / "palace" + palace_dir.mkdir() + (palace_dir / "chroma.sqlite3").write_text("db") + + mock_chromadb = SimpleNamespace(__version__="0.6.0", PersistentClient=MagicMock()) + mock_chromadb.PersistentClient.side_effect = Exception("unreadable") + + with ( + patch.dict("sys.modules", {"chromadb": mock_chromadb}), + patch("mempalace.migrate.detect_chromadb_version", return_value="0.5.x"), + patch( + "mempalace.migrate.extract_drawers_from_sqlite", + return_value=[{"id": "id1", "document": "doc", "metadata": {"wing": "w", "room": "r"}}], + ), + patch("builtins.input", return_value="n"), + patch("mempalace.migrate.shutil.copytree") as mock_copytree, + patch("mempalace.migrate.shutil.rmtree") as mock_rmtree, + ): + result = migrate(str(palace_dir)) + + out = capsys.readouterr().out + assert result is False + assert "Aborted." in out + mock_copytree.assert_not_called() + mock_rmtree.assert_not_called() diff --git a/tests/test_query_sanitizer.py b/tests/test_query_sanitizer.py index 2f28891..2a5c94b 100644 --- a/tests/test_query_sanitizer.py +++ b/tests/test_query_sanitizer.py @@ -123,6 +123,9 @@ class TestTailTruncation: class TestLengthGuards: """Verify output length constraints.""" + def test_max_query_length_reduced(self): + assert MAX_QUERY_LENGTH == 250 + def test_output_never_exceeds_max(self): # Very long question sentence long_question = "a" * 1000 + "?" From d2d4e62543e025718cbaedd46754a76c972f33a9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:23:13 +0000 Subject: [PATCH 3/9] test: expand security regression coverage Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/775f2fc4-3051-462e-8586-6d694b55da0d Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/cli.py | 11 +++++++---- mempalace/migrate.py | 4 ++-- tests/test_query_sanitizer.py | 20 ++++++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index 3244c8b..6acfcb4 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -170,7 +170,7 @@ def cmd_repair(args): """Rebuild palace vector index from SQLite metadata.""" import chromadb import shutil - from .migrate import confirm_destructive_action, has_palace_database + from .migrate import confirm_destructive_action, contains_palace_database palace_path = os.path.abspath( os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path @@ -180,7 +180,7 @@ def cmd_repair(args): if not os.path.isdir(palace_path): print(f"\n No palace found at {palace_path}") return - if not has_palace_database(palace_path): + if not contains_palace_database(palace_path): print(f"\n No palace database found at {db_path}") return @@ -226,8 +226,11 @@ def cmd_repair(args): palace_path = palace_path.rstrip(os.sep) backup_path = palace_path + ".backup" if os.path.exists(backup_path): - if not has_palace_database(backup_path): - print(f" Refusing to delete non-palace backup path: {backup_path}") + if not contains_palace_database(backup_path): + print( + " Cannot proceed: backup path exists but is not a valid palace database. " + f"Please remove or rename: {backup_path}" + ) return shutil.rmtree(backup_path) print(f" Backing up to {backup_path}...") diff --git a/mempalace/migrate.py b/mempalace/migrate.py index 715511d..6410801 100644 --- a/mempalace/migrate.py +++ b/mempalace/migrate.py @@ -104,7 +104,7 @@ def detect_chromadb_version(db_path: str) -> str: conn.close() -def has_palace_database(path: str) -> bool: +def contains_palace_database(path: str) -> bool: """Return True when path looks like a MemPalace ChromaDB directory.""" return os.path.isfile(os.path.join(path, "chroma.sqlite3")) @@ -135,7 +135,7 @@ def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): palace_path = os.path.abspath(os.path.expanduser(palace_path)) db_path = os.path.join(palace_path, "chroma.sqlite3") - if not os.path.isdir(palace_path) or not has_palace_database(palace_path): + if not os.path.isdir(palace_path) or not contains_palace_database(palace_path): print(f"\n No palace database found at {db_path}") return False diff --git a/tests/test_query_sanitizer.py b/tests/test_query_sanitizer.py index 2a5c94b..015a8a1 100644 --- a/tests/test_query_sanitizer.py +++ b/tests/test_query_sanitizer.py @@ -102,6 +102,20 @@ class TestTailSentence: assert result["was_sanitized"] is True assert "MemPalace" in result["clean_query"] or "ChromaDB" in result["clean_query"] + def test_long_candidate_uses_last_sentence_fragment(self): + query = ("Prompt sentence. " * 30) + "Final search intent for architecture migration" + result = sanitize_query(query) + assert result["method"] == "tail_sentence" + assert result["clean_query"] == "Final search intent for architecture migration" + + def test_long_candidate_strips_wrapping_quotes(self): + query = ("Prefix text " * 30) + '\n"' + ("x" * 260) + '"' + result = sanitize_query(query) + assert result["method"] == "tail_sentence" + assert not result["clean_query"].startswith('"') + assert not result["clean_query"].endswith('"') + assert len(result["clean_query"]) <= MAX_QUERY_LENGTH + class TestTailTruncation: """Step 4: Fallback — take the last MAX_QUERY_LENGTH characters.""" @@ -119,6 +133,12 @@ class TestTailTruncation: result = sanitize_query(filler) assert "IMPORTANT_QUERY_CONTENT" in result["clean_query"] + def test_tail_sentence_fallback_preserves_tail_without_delimiters(self): + filler = ("x" * 260) + "IMPORTANT_QUERY_CONTENT" + result = sanitize_query(filler) + assert result["method"] == "tail_sentence" + assert "IMPORTANT_QUERY_CONTENT" in result["clean_query"] + class TestLengthGuards: """Verify output length constraints.""" From 248ecd98f14a2af3c91e31e06d63014945cc3a11 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:24:41 +0000 Subject: [PATCH 4/9] fix: polish sanitizer and repair messaging Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/775f2fc4-3051-462e-8586-6d694b55da0d Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/cli.py | 3 ++- mempalace/query_sanitizer.py | 12 ++++++++++-- tests/test_migrate.py | 6 ++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index 6acfcb4..4483b4f 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -228,7 +228,8 @@ def cmd_repair(args): if os.path.exists(backup_path): if not contains_palace_database(backup_path): print( - " Cannot proceed: backup path exists but is not a valid palace database. " + " Cannot proceed: backup path exists but does not contain a valid palace backup " + "(expected chroma.sqlite3). " f"Please remove or rename: {backup_path}" ) return diff --git a/mempalace/query_sanitizer.py b/mempalace/query_sanitizer.py index cb783f7..9741312 100644 --- a/mempalace/query_sanitizer.py +++ b/mempalace/query_sanitizer.py @@ -67,13 +67,21 @@ def sanitize_query(raw_query: str) -> dict: raw_query = raw_query.strip() original_length = len(raw_query) + def _strip_wrapping_quotes(candidate: str) -> str: + candidate = candidate.strip() + while candidate[:1] in {"'", '"'} or candidate[-1:] in {"'", '"'}: + candidate = candidate.strip("\"'") + if not candidate: + return "" + return candidate + def _trim_candidate(candidate: str) -> str: - candidate = candidate.strip().strip("\"'") + candidate = _strip_wrapping_quotes(candidate) if len(candidate) <= MAX_QUERY_LENGTH: return candidate nested_fragments = [ - frag.strip().strip("\"'") for frag in _SENTENCE_SPLIT.split(candidate) if frag.strip() + _strip_wrapping_quotes(frag) for frag in _SENTENCE_SPLIT.split(candidate) if frag.strip() ] for frag in reversed(nested_fragments): if MIN_QUERY_LENGTH <= len(frag) <= MAX_QUERY_LENGTH: diff --git a/tests/test_migrate.py b/tests/test_migrate.py index 1a02462..33c9191 100644 --- a/tests/test_migrate.py +++ b/tests/test_migrate.py @@ -22,8 +22,10 @@ def test_migrate_aborts_without_confirmation(tmp_path, capsys): palace_dir.mkdir() (palace_dir / "chroma.sqlite3").write_text("db") - mock_chromadb = SimpleNamespace(__version__="0.6.0", PersistentClient=MagicMock()) - mock_chromadb.PersistentClient.side_effect = Exception("unreadable") + mock_chromadb = SimpleNamespace( + __version__="0.6.0", + PersistentClient=MagicMock(side_effect=Exception("unreadable")), + ) with ( patch.dict("sys.modules", {"chromadb": mock_chromadb}), From 976289aa5cb27c02cab292ed51faeb7120e837f0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:25:41 +0000 Subject: [PATCH 5/9] fix: refine security validation messaging Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/775f2fc4-3051-462e-8586-6d694b55da0d Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/cli.py | 3 +-- mempalace/migrate.py | 6 ++++-- mempalace/query_sanitizer.py | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index 4483b4f..c278492 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -228,8 +228,7 @@ def cmd_repair(args): if os.path.exists(backup_path): if not contains_palace_database(backup_path): print( - " Cannot proceed: backup path exists but does not contain a valid palace backup " - "(expected chroma.sqlite3). " + " Cannot proceed: backup path exists but does not contain chroma.sqlite3. " f"Please remove or rename: {backup_path}" ) return diff --git a/mempalace/migrate.py b/mempalace/migrate.py index 6410801..40a9701 100644 --- a/mempalace/migrate.py +++ b/mempalace/migrate.py @@ -109,12 +109,14 @@ def contains_palace_database(path: str) -> bool: return os.path.isfile(os.path.join(path, "chroma.sqlite3")) -def confirm_destructive_action(action: str, palace_path: str, assume_yes: bool = False) -> bool: +def confirm_destructive_action( + operation_name: str, palace_path: str, assume_yes: bool = False +) -> bool: """Require confirmation before destructive palace operations.""" if assume_yes: return True - print(f"\n {action} will replace data in: {palace_path}") + print(f"\n {operation_name} will replace data in: {palace_path}") print(" A backup will be created first, but the original directory will be deleted.") try: answer = input(" Continue? [y/N]: ").strip().lower() diff --git a/mempalace/query_sanitizer.py b/mempalace/query_sanitizer.py index 9741312..f86a621 100644 --- a/mempalace/query_sanitizer.py +++ b/mempalace/query_sanitizer.py @@ -69,11 +69,11 @@ def sanitize_query(raw_query: str) -> dict: def _strip_wrapping_quotes(candidate: str) -> str: candidate = candidate.strip() - while candidate[:1] in {"'", '"'} or candidate[-1:] in {"'", '"'}: + while candidate[:1] in {"'", '"'} and candidate[-1:] in {"'", '"'}: candidate = candidate.strip("\"'") if not candidate: return "" - return candidate + return candidate.strip("\"'") def _trim_candidate(candidate: str) -> str: candidate = _strip_wrapping_quotes(candidate) From b1a676fa246a1cd3c931064a10abc3d6fc95687f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:26:41 +0000 Subject: [PATCH 6/9] fix: make quote trimming explicit Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/775f2fc4-3051-462e-8586-6d694b55da0d Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/query_sanitizer.py | 10 +++++++--- tests/test_query_sanitizer.py | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mempalace/query_sanitizer.py b/mempalace/query_sanitizer.py index f86a621..b320e9c 100644 --- a/mempalace/query_sanitizer.py +++ b/mempalace/query_sanitizer.py @@ -69,11 +69,15 @@ def sanitize_query(raw_query: str) -> dict: def _strip_wrapping_quotes(candidate: str) -> str: candidate = candidate.strip() - while candidate[:1] in {"'", '"'} and candidate[-1:] in {"'", '"'}: - candidate = candidate.strip("\"'") + while len(candidate) >= 2 and candidate[:1] in {"'", '"'} and candidate[-1:] in {"'", '"'}: + candidate = candidate[1:-1].strip() if not candidate: return "" - return candidate.strip("\"'") + if candidate[:1] in {"'", '"'}: + candidate = candidate[1:].strip() + if candidate[-1:] in {"'", '"'}: + candidate = candidate[:-1].strip() + return candidate def _trim_candidate(candidate: str) -> str: candidate = _strip_wrapping_quotes(candidate) diff --git a/tests/test_query_sanitizer.py b/tests/test_query_sanitizer.py index 015a8a1..dd96cfa 100644 --- a/tests/test_query_sanitizer.py +++ b/tests/test_query_sanitizer.py @@ -112,6 +112,7 @@ class TestTailSentence: query = ("Prefix text " * 30) + '\n"' + ("x" * 260) + '"' result = sanitize_query(query) assert result["method"] == "tail_sentence" + assert result["clean_query"] == "x" * MAX_QUERY_LENGTH assert not result["clean_query"].startswith('"') assert not result["clean_query"].endswith('"') assert len(result["clean_query"]) <= MAX_QUERY_LENGTH From c3835237680642da163c0644bd9057f926f1eaa0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:27:40 +0000 Subject: [PATCH 7/9] chore: clarify security guardrails Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/775f2fc4-3051-462e-8586-6d694b55da0d Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/cli.py | 2 +- mempalace/query_sanitizer.py | 7 ++++--- tests/test_migrate.py | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index c278492..a73d25e 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -228,7 +228,7 @@ def cmd_repair(args): if os.path.exists(backup_path): if not contains_palace_database(backup_path): print( - " Cannot proceed: backup path exists but does not contain chroma.sqlite3. " + " Backup validation failed: backup path exists but does not contain chroma.sqlite3. " f"Please remove or rename: {backup_path}" ) return diff --git a/mempalace/query_sanitizer.py b/mempalace/query_sanitizer.py index b320e9c..dda29a8 100644 --- a/mempalace/query_sanitizer.py +++ b/mempalace/query_sanitizer.py @@ -27,6 +27,7 @@ logger = logging.getLogger("mempalace_mcp") MAX_QUERY_LENGTH = 250 # Above this, prompt contamination increasingly dominates SAFE_QUERY_LENGTH = 200 # Below this, query is almost certainly clean MIN_QUERY_LENGTH = 10 # Extracted result shorter than this = extraction failed +QUOTE_CHARS = {"'", '"'} # Sentence splitter: split on . ! ? (including fullwidth) and newlines _SENTENCE_SPLIT = re.compile(r"[.!?。!?\n]+") @@ -69,13 +70,13 @@ def sanitize_query(raw_query: str) -> dict: def _strip_wrapping_quotes(candidate: str) -> str: candidate = candidate.strip() - while len(candidate) >= 2 and candidate[:1] in {"'", '"'} and candidate[-1:] in {"'", '"'}: + while len(candidate) >= 2 and candidate[:1] in QUOTE_CHARS and candidate[-1:] in QUOTE_CHARS: candidate = candidate[1:-1].strip() if not candidate: return "" - if candidate[:1] in {"'", '"'}: + if candidate[:1] in QUOTE_CHARS: candidate = candidate[1:].strip() - if candidate[-1:] in {"'", '"'}: + if candidate[-1:] in QUOTE_CHARS: candidate = candidate[:-1].strip() return candidate diff --git a/tests/test_migrate.py b/tests/test_migrate.py index 33c9191..f7e7d7e 100644 --- a/tests/test_migrate.py +++ b/tests/test_migrate.py @@ -20,6 +20,7 @@ def test_migrate_requires_palace_database(tmp_path, capsys): def test_migrate_aborts_without_confirmation(tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() + # Presence of chroma.sqlite3 is the safety gate; validity is mocked below. (palace_dir / "chroma.sqlite3").write_text("db") mock_chromadb = SimpleNamespace( From 22328540e11fae2511a76d592bfda7268dd0f5ed Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:20:27 -0300 Subject: [PATCH 8/9] style: ruff format --- mempalace/cli.py | 8 ++++++-- mempalace/query_sanitizer.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index a73d25e..806abcc 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -204,7 +204,9 @@ def cmd_repair(args): print(" Nothing to repair.") return - if not confirm_destructive_action("Repair", palace_path, assume_yes=getattr(args, "yes", False)): + if not confirm_destructive_action( + "Repair", palace_path, assume_yes=getattr(args, "yes", False) + ): return # Extract all drawers in batches @@ -567,7 +569,9 @@ def main(): action="store_true", help="Show what would be migrated without changing anything", ) - p_migrate.add_argument("--yes", action="store_true", help="Skip confirmation for destructive changes") + p_migrate.add_argument( + "--yes", action="store_true", help="Skip confirmation for destructive changes" + ) sub.add_parser("status", help="Show what's been filed") diff --git a/mempalace/query_sanitizer.py b/mempalace/query_sanitizer.py index dda29a8..91d0ca2 100644 --- a/mempalace/query_sanitizer.py +++ b/mempalace/query_sanitizer.py @@ -70,7 +70,9 @@ def sanitize_query(raw_query: str) -> dict: def _strip_wrapping_quotes(candidate: str) -> str: candidate = candidate.strip() - while len(candidate) >= 2 and candidate[:1] in QUOTE_CHARS and candidate[-1:] in QUOTE_CHARS: + while ( + len(candidate) >= 2 and candidate[:1] in QUOTE_CHARS and candidate[-1:] in QUOTE_CHARS + ): candidate = candidate[1:-1].strip() if not candidate: return "" @@ -86,7 +88,9 @@ def sanitize_query(raw_query: str) -> dict: return candidate nested_fragments = [ - _strip_wrapping_quotes(frag) for frag in _SENTENCE_SPLIT.split(candidate) if frag.strip() + _strip_wrapping_quotes(frag) + for frag in _SENTENCE_SPLIT.split(candidate) + if frag.strip() ] for frag in reversed(nested_fragments): if MIN_QUERY_LENGTH <= len(frag) <= MAX_QUERY_LENGTH: From c68370609d086534cf4237026068b6b0bed8cb95 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 12 Apr 2026 23:07:46 -0300 Subject: [PATCH 9/9] fix: address Copilot review comments on PR #739 - query_sanitizer: require matching quote pair in _strip_wrapping_quotes - query_sanitizer: re-check MIN_QUERY_LENGTH after trim in tail_sentence path - migrate: neutral confirmation message accurate for both migrate and repair - cli: os.path.normpath instead of rstrip to handle '/' root edge case --- mempalace/cli.py | 2 +- mempalace/migrate.py | 2 +- mempalace/query_sanitizer.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index 806abcc..8bf3f20 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -225,7 +225,7 @@ def cmd_repair(args): print(f" Extracted {len(all_ids)} drawers") # Backup and rebuild - palace_path = palace_path.rstrip(os.sep) + palace_path = os.path.normpath(palace_path) backup_path = palace_path + ".backup" if os.path.exists(backup_path): if not contains_palace_database(backup_path): diff --git a/mempalace/migrate.py b/mempalace/migrate.py index 40a9701..6ec4a59 100644 --- a/mempalace/migrate.py +++ b/mempalace/migrate.py @@ -117,7 +117,7 @@ def confirm_destructive_action( return True print(f"\n {operation_name} will replace data in: {palace_path}") - print(" A backup will be created first, but the original directory will be deleted.") + print(" A backup will be created first, then the palace will be rebuilt.") try: answer = input(" Continue? [y/N]: ").strip().lower() except EOFError: diff --git a/mempalace/query_sanitizer.py b/mempalace/query_sanitizer.py index 91d0ca2..774baea 100644 --- a/mempalace/query_sanitizer.py +++ b/mempalace/query_sanitizer.py @@ -71,7 +71,7 @@ def sanitize_query(raw_query: str) -> dict: def _strip_wrapping_quotes(candidate: str) -> str: candidate = candidate.strip() while ( - len(candidate) >= 2 and candidate[:1] in QUOTE_CHARS and candidate[-1:] in QUOTE_CHARS + len(candidate) >= 2 and candidate[:1] in QUOTE_CHARS and candidate[:1] == candidate[-1:] ): candidate = candidate[1:-1].strip() if not candidate: @@ -158,6 +158,8 @@ def sanitize_query(raw_query: str) -> dict: seg = seg.strip() if len(seg) >= MIN_QUERY_LENGTH: candidate = _trim_candidate(seg) + if len(candidate) < MIN_QUERY_LENGTH: + continue logger.warning( "Query sanitized: %d → %d chars (method=tail_sentence)", original_length,