Merge branch 'main' into fix/issue-339-338-silent-exceptions-pagination

This commit is contained in:
Ben Sigman
2026-04-09 23:31:45 -07:00
committed by GitHub
21 changed files with 774 additions and 195 deletions
+3 -3
View File
@@ -18,7 +18,7 @@ jobs:
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- run: pip install -e ".[dev]" - run: pip install -e ".[dev]"
- run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=85 - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80
test-windows: test-windows:
runs-on: windows-latest runs-on: windows-latest
@@ -28,7 +28,7 @@ jobs:
with: with:
python-version: "3.9" python-version: "3.9"
- run: pip install -e ".[dev]" - run: pip install -e ".[dev]"
- run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=85 - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80
test-macos: test-macos:
runs-on: macos-latest runs-on: macos-latest
@@ -38,7 +38,7 @@ jobs:
with: with:
python-version: "3.9" python-version: "3.9"
- run: pip install -e ".[dev]" - run: pip install -e ".[dev]"
- run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=85 - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80
lint: lint:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
+4 -1
View File
@@ -585,6 +585,9 @@ mempalace compress --wing myapp # AAAK compress
# Status # Status
mempalace status # palace overview mempalace status # palace overview
# MCP
mempalace mcp # show MCP setup command
``` ```
All commands accept `--palace <path>` to override the default location. All commands accept `--palace <path>` to override the default location.
@@ -707,7 +710,7 @@ PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup and guidelines.
MIT — see [LICENSE](LICENSE). MIT — see [LICENSE](LICENSE).
<!-- Link Definitions --> <!-- Link Definitions -->
[version-shield]: https://img.shields.io/badge/version-3.0.0-4dc9f6?style=flat-square&labelColor=0a0e14 [version-shield]: https://img.shields.io/badge/version-3.1.0-4dc9f6?style=flat-square&labelColor=0a0e14
[release-link]: https://github.com/milla-jovovich/mempalace/releases [release-link]: https://github.com/milla-jovovich/mempalace/releases
[python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8 [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8
[python-link]: https://www.python.org/ [python-link]: https://www.python.org/
+15 -8
View File
@@ -64,13 +64,20 @@ MEMPAL_DIR=""
# Read JSON input from stdin # Read JSON input from stdin
INPUT=$(cat) INPUT=$(cat)
# Parse fields from Claude Code's JSON # Parse all fields in a single Python call (3x faster than separate invocations)
SESSION_ID=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null) eval $(echo "$INPUT" | python3 -c "
# Sanitize SESSION_ID to prevent path traversal (only allow alnum, dash, underscore) import sys, json
SESSION_ID=$(echo "$SESSION_ID" | tr -cd 'a-zA-Z0-9_-') data = json.load(sys.stdin)
[ -z "$SESSION_ID" ] && SESSION_ID="unknown" sid = data.get('session_id', 'unknown')
STOP_HOOK_ACTIVE=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('stop_hook_active', False))" 2>/dev/null) sha = data.get('stop_hook_active', False)
TRANSCRIPT_PATH=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('transcript_path',''))" 2>/dev/null) tp = data.get('transcript_path', '')
# Shell-safe output — only allow alphanumeric, underscore, hyphen, slash, dot, tilde
import re
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
print(f'SESSION_ID=\"{safe(sid)}\"')
print(f'STOP_HOOK_ACTIVE=\"{sha}\"')
print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
" 2>/dev/null)
# Expand ~ in path # Expand ~ in path
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}" TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
@@ -83,6 +90,7 @@ if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
fi fi
# Count human messages in the JSONL transcript # Count human messages in the JSONL transcript
# SECURITY: Pass transcript path as sys.argv to avoid shell injection via crafted paths
if [ -f "$TRANSCRIPT_PATH" ]; then if [ -f "$TRANSCRIPT_PATH" ]; then
EXCHANGE_COUNT=$(python3 - "$TRANSCRIPT_PATH" <<'PYEOF' EXCHANGE_COUNT=$(python3 - "$TRANSCRIPT_PATH" <<'PYEOF'
import json, sys import json, sys
@@ -94,7 +102,6 @@ with open(sys.argv[1]) as f:
msg = entry.get('message', {}) msg = entry.get('message', {})
if isinstance(msg, dict) and msg.get('role') == 'user': if isinstance(msg, dict) and msg.get('role') == 'user':
content = msg.get('content', '') content = msg.get('content', '')
# Skip system/command messages — only count real human input
if isinstance(content, str) and '<command-message>' in content: if isinstance(content, str) and '<command-message>' in content:
continue continue
count += 1 count += 1
+154
View File
@@ -0,0 +1,154 @@
---
name: mempalace
description: "MemPalace — Local AI memory with 96.6% recall. Semantic search, temporal knowledge graph, palace architecture (wings/rooms/drawers). Free, no cloud, no API keys."
version: 3.1.0
homepage: https://github.com/milla-jovovich/mempalace
user-invocable: true
metadata:
openclaw:
emoji: "\U0001F3DB"
os:
- darwin
- linux
- win32
requires:
anyBins:
- mempalace
- python3
install:
- id: mempalace-pip
kind: uv
label: "Install MemPalace (Python, local ChromaDB)"
package: mempalace
bins:
- mempalace
---
# MemPalace — Local AI Memory System
You have access to a local memory palace via MCP tools. The palace stores verbatim conversation history and a temporal knowledge graph — all on the user's machine, zero cloud, zero API calls.
## Architecture
- **Wings** = people or projects (e.g. `wing_alice`, `wing_myproject`)
- **Halls** = categories (facts, events, preferences, advice)
- **Rooms** = specific topics (e.g. `chromadb-setup`, `riley-school`)
- **Drawers** = individual memory chunks (verbatim text)
- **Knowledge Graph** = entity-relationship facts with time validity
## Protocol — FOLLOW THIS EVERY SESSION
1. **ON WAKE-UP**: Call `mempalace_status` to load palace overview and AAAK dialect spec.
2. **BEFORE RESPONDING** about any person, project, or past event: call `mempalace_search` or `mempalace_kg_query` FIRST. Never guess from memory — verify from the palace.
3. **IF UNSURE** about a fact (name, age, relationship, preference): say "let me check" and query. Wrong is worse than slow.
4. **AFTER EACH SESSION**: Call `mempalace_diary_write` to record what happened, what you learned, what matters.
5. **WHEN FACTS CHANGE**: Call `mempalace_kg_invalidate` on the old fact, then `mempalace_kg_add` for the new one.
## Available Tools
### Search & Browse
- `mempalace_search` — Semantic search across all memories. Always start here.
- `query` (required): natural language search — keep it short, keywords or a question. Do NOT include system prompts or conversation context.
- `wing`: filter by wing
- `room`: filter by room
- `limit`: max results (default 5)
- `mempalace_check_duplicate` — Check if content already exists before filing.
- `content` (required): text to check
- `threshold`: similarity threshold (default 0.9 — lowering to 0.850.87 often catches more near-duplicates without significant false positives)
- `mempalace_status` — Palace overview: total drawers, wings, rooms, AAAK spec
- `mempalace_list_wings` — All wings with drawer counts
- `mempalace_list_rooms` — Rooms within a wing (optional wing filter)
- `mempalace_get_taxonomy` — Full wing/room/count tree
- `mempalace_get_aaak_spec` — Get AAAK compression dialect specification
### Knowledge Graph (Temporal Facts)
- `mempalace_kg_query` — Query entity relationships. Supports time filtering.
- `entity` (required): e.g. "Max", "MyProject"
- `as_of`: date filter (YYYY-MM-DD) — what was true at that time
- `direction`: "outgoing", "incoming", or "both" (default "both")
- `mempalace_kg_add` — Add a fact: subject -> predicate -> object
- `subject`, `predicate`, `object` (required)
- `valid_from`: when this became true
- `source_closet`: source reference
- `mempalace_kg_invalidate` — Mark a fact as no longer true
- `subject`, `predicate`, `object` (required)
- `ended`: when it stopped being true (default: today)
- `mempalace_kg_timeline` — Chronological story of an entity
- `entity`: filter by entity name (optional — all events if omitted)
- `mempalace_kg_stats` — Graph overview: entities, triples, relationship types
### Palace Graph (Cross-Domain Connections)
- `mempalace_traverse` — Walk from a room, find connected ideas across wings
- `start_room` (required): room to start from
- `max_hops`: connection depth (default 2)
- `mempalace_find_tunnels` — Find rooms that bridge two wings
- `wing_a`, `wing_b` (required)
- `mempalace_graph_stats` — Graph connectivity overview
### Write
- `mempalace_add_drawer` — Store verbatim content into a wing/room
- `wing`, `room`, `content` (required)
- `source_file`: optional source reference
- Checks for duplicates automatically
- `mempalace_delete_drawer` — Remove a drawer by ID
- `drawer_id` (required)
- `mempalace_diary_write` — Write a session diary entry
- `agent_name` (required): your name/identifier
- `entry` (required): what happened, what you learned, what matters
- `topic`: category tag (default "general")
- `mempalace_diary_read` — Read recent diary entries
- `agent_name` (required)
- `last_n`: number of entries (default 10)
## Setup
Install MemPalace and populate the palace:
```bash
pip install mempalace
mempalace init ~/my-convos
mempalace mine ~/my-convos
```
### OpenClaw MCP config
Add to your OpenClaw MCP configuration:
```json
{
"mcpServers": {
"mempalace": {
"command": "python3",
"args": ["-m", "mempalace.mcp_server"]
}
}
}
```
Or via CLI:
```bash
openclaw mcp set mempalace '{"command":"python3","args":["-m","mempalace.mcp_server"]}'
```
### Other MCP hosts
```bash
# Claude Code
claude mcp add mempalace -- python -m mempalace.mcp_server
# Cursor — add to .cursor/mcp.json
# Codex — add to .codex/mcp.json
```
## Tips
- Search is semantic (meaning-based), not keyword. "What did we discuss about database performance?" works better than "database".
- The knowledge graph stores typed relationships with time windows. Use it for facts about people and projects — it knows WHEN things were true.
- Diary entries accumulate across sessions. Write one at the end of each conversation to build continuity.
- Use `mempalace_check_duplicate` before storing new content to avoid duplicates.
- The AAAK dialect (from `mempalace_status`) is a compressed notation for efficient storage. Read it naturally — expand codes mentally, treat *markers* as emotional context.
## License
[MemPalace](https://github.com/milla-jovovich/mempalace) is MIT licensed. Created by Milla Jovovich, Ben Sigman, Igor Lins e Silva, and contributors.
+31
View File
@@ -14,6 +14,7 @@ Commands:
mempalace mine <dir> Mine project files (default) mempalace mine <dir> Mine project files (default)
mempalace mine <dir> --mode convos Mine conversation exports mempalace mine <dir> --mode convos Mine conversation exports
mempalace search "query" Find anything, exact words mempalace search "query" Find anything, exact words
mempalace mcp Show MCP setup command
mempalace wake-up Show L0 + L1 wake-up context mempalace wake-up Show L0 + L1 wake-up context
mempalace wake-up --wing my_app Wake-up for a specific project mempalace wake-up --wing my_app Wake-up for a specific project
mempalace status Show what's been filed mempalace status Show what's been filed
@@ -28,6 +29,7 @@ Examples:
import os import os
import sys import sys
import shlex
import argparse import argparse
from pathlib import Path from pathlib import Path
@@ -202,6 +204,7 @@ def cmd_repair(args):
print(f" Extracted {len(all_ids)} drawers") print(f" Extracted {len(all_ids)} drawers")
# Backup and rebuild # Backup and rebuild
palace_path = palace_path.rstrip(os.sep)
backup_path = palace_path + ".backup" backup_path = palace_path + ".backup"
if os.path.exists(backup_path): if os.path.exists(backup_path):
shutil.rmtree(backup_path) shutil.rmtree(backup_path)
@@ -240,6 +243,27 @@ def cmd_instructions(args):
run_instructions(name=args.name) run_instructions(name=args.name)
def cmd_mcp(args):
"""Show how to wire MemPalace into MCP-capable hosts."""
base_server_cmd = "python -m mempalace.mcp_server"
if args.palace:
resolved_palace = str(Path(args.palace).expanduser())
server_cmd = f"{base_server_cmd} --palace {shlex.quote(resolved_palace)}"
else:
server_cmd = base_server_cmd
print("MemPalace MCP quick setup:")
print(f" claude mcp add mempalace -- {server_cmd}")
print("\nRun the server directly:")
print(f" {server_cmd}")
if not args.palace:
print("\nOptional custom palace:")
print(f" claude mcp add mempalace -- {base_server_cmd} --palace /path/to/palace")
print(f" {base_server_cmd} --palace /path/to/palace")
def cmd_compress(args): def cmd_compress(args):
"""Compress drawers in a wing using AAAK Dialect.""" """Compress drawers in a wing using AAAK Dialect."""
import chromadb import chromadb
@@ -500,6 +524,12 @@ def main():
help="Rebuild palace vector index from stored data (fixes segfaults after corruption)", help="Rebuild palace vector index from stored data (fixes segfaults after corruption)",
) )
# mcp
sub.add_parser(
"mcp",
help="Show MCP setup command for connecting MemPalace to your AI client",
)
# status # status
sub.add_parser("status", help="Show what's been filed") sub.add_parser("status", help="Show what's been filed")
@@ -531,6 +561,7 @@ def main():
"mine": cmd_mine, "mine": cmd_mine,
"split": cmd_split, "split": cmd_split,
"search": cmd_search, "search": cmd_search,
"mcp": cmd_mcp,
"compress": cmd_compress, "compress": cmd_compress,
"wake-up": cmd_wakeup, "wake-up": cmd_wakeup,
"repair": cmd_repair, "repair": cmd_repair,
+60
View File
@@ -6,8 +6,58 @@ Priority: env vars > config file (~/.mempalace/config.json) > defaults
import json import json
import os import os
import re
from pathlib import Path from pathlib import Path
# ── Input validation ──────────────────────────────────────────────────────────
# Shared sanitizers for wing/room/entity names. Prevents path traversal,
# excessively long strings, and special characters that could cause issues
# in file paths, SQLite, or ChromaDB metadata.
MAX_NAME_LENGTH = 128
_SAFE_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_ .'-]{0,126}[a-zA-Z0-9]?$")
def sanitize_name(value: str, field_name: str = "name") -> str:
"""Validate and sanitize a wing/room/entity name.
Raises ValueError if the name is invalid.
"""
if not isinstance(value, str) or not value.strip():
raise ValueError(f"{field_name} must be a non-empty string")
value = value.strip()
if len(value) > MAX_NAME_LENGTH:
raise ValueError(f"{field_name} exceeds maximum length of {MAX_NAME_LENGTH} characters")
# Block path traversal
if ".." in value or "/" in value or "\\" in value:
raise ValueError(f"{field_name} contains invalid path characters")
# Block null bytes
if "\x00" in value:
raise ValueError(f"{field_name} contains null bytes")
# Enforce safe character set
if not _SAFE_NAME_RE.match(value):
raise ValueError(f"{field_name} contains invalid characters")
return value
def sanitize_content(value: str, max_length: int = 100_000) -> str:
"""Validate drawer/diary content length."""
if not isinstance(value, str) or not value.strip():
raise ValueError("content must be a non-empty string")
if len(value) > max_length:
raise ValueError(f"content exceeds maximum length of {max_length} characters")
if "\x00" in value:
raise ValueError("content contains null bytes")
return value
DEFAULT_PALACE_PATH = os.path.expanduser("~/.mempalace/palace") DEFAULT_PALACE_PATH = os.path.expanduser("~/.mempalace/palace")
DEFAULT_COLLECTION_NAME = "mempalace_drawers" DEFAULT_COLLECTION_NAME = "mempalace_drawers"
@@ -126,6 +176,11 @@ class MempalaceConfig:
def init(self): def init(self):
"""Create config directory and write default config.json if it doesn't exist.""" """Create config directory and write default config.json if it doesn't exist."""
self._config_dir.mkdir(parents=True, exist_ok=True) self._config_dir.mkdir(parents=True, exist_ok=True)
# Restrict directory permissions to owner only (Unix)
try:
self._config_dir.chmod(0o700)
except (OSError, NotImplementedError):
pass # Windows doesn't support Unix permissions
if not self._config_file.exists(): if not self._config_file.exists():
default_config = { default_config = {
"palace_path": DEFAULT_PALACE_PATH, "palace_path": DEFAULT_PALACE_PATH,
@@ -135,6 +190,11 @@ class MempalaceConfig:
} }
with open(self._config_file, "w") as f: with open(self._config_file, "w") as f:
json.dump(default_config, f, indent=2) json.dump(default_config, f, indent=2)
# Restrict config file to owner read/write only
try:
self._config_file.chmod(0o600)
except (OSError, NotImplementedError):
pass
return self._config_file return self._config_file
def save_people_map(self, people_map): def save_people_map(self, people_map):
+11 -35
View File
@@ -15,9 +15,8 @@ from pathlib import Path
from datetime import datetime from datetime import datetime
from collections import defaultdict from collections import defaultdict
import chromadb
from .normalize import normalize from .normalize import normalize
from .palace import SKIP_DIRS, get_collection, file_already_mined
# File types that might contain conversations # File types that might contain conversations
@@ -28,22 +27,8 @@ CONVO_EXTENSIONS = {
".jsonl", ".jsonl",
} }
SKIP_DIRS = {
".git",
"node_modules",
"__pycache__",
".venv",
"venv",
"env",
"dist",
"build",
".next",
".mempalace",
"tool-results",
"memory",
}
MIN_CHUNK_SIZE = 30 MIN_CHUNK_SIZE = 30
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB — skip files larger than this
# ============================================================================= # =============================================================================
@@ -211,23 +196,6 @@ def detect_convo_room(content: str) -> str:
# ============================================================================= # =============================================================================
def get_collection(palace_path: str):
os.makedirs(palace_path, exist_ok=True)
client = chromadb.PersistentClient(path=palace_path)
try:
return client.get_collection("mempalace_drawers")
except Exception:
return client.create_collection("mempalace_drawers")
def file_already_mined(collection, source_file: str) -> bool:
try:
results = collection.get(where={"source_file": source_file}, limit=1)
return len(results.get("ids", [])) > 0
except Exception:
return False
# ============================================================================= # =============================================================================
# SCAN FOR CONVERSATION FILES # SCAN FOR CONVERSATION FILES
# ============================================================================= # =============================================================================
@@ -244,6 +212,14 @@ def scan_convos(convo_dir: str) -> list:
continue continue
filepath = Path(root) / filename filepath = Path(root) / filename
if filepath.suffix.lower() in CONVO_EXTENSIONS: if filepath.suffix.lower() in CONVO_EXTENSIONS:
# Skip symlinks and oversized files
if filepath.is_symlink():
continue
try:
if filepath.stat().st_size > MAX_FILE_SIZE:
continue
except OSError:
continue
files.append(filepath) files.append(filepath)
return files return files
@@ -356,7 +332,7 @@ def mine_convos(
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
if extract_mode == "general": if extract_mode == "general":
room_counts[chunk_room] += 1 room_counts[chunk_room] += 1
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.md5((source_file + str(chunk['chunk_index'])).encode(), usedforsecurity=False).hexdigest()[:16]}" drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
try: try:
collection.add( collection.add(
documents=[chunk["content"]], documents=[chunk["content"]],
+1
View File
@@ -60,6 +60,7 @@ AI memory system. Store everything, find anything. Local, free, no API key.
mempalace compress Compress palace storage mempalace compress Compress palace storage
mempalace status Show palace status mempalace status Show palace status
mempalace repair Rebuild vector index mempalace repair Rebuild vector index
mempalace mcp Show MCP setup command
mempalace hook run Run hook logic (for harness integration) mempalace hook run Run hook logic (for harness integration)
mempalace instructions <name> Output skill instructions mempalace instructions <name> Output skill instructions
+83 -77
View File
@@ -50,11 +50,14 @@ class KnowledgeGraph:
def __init__(self, db_path: str = None): def __init__(self, db_path: str = None):
self.db_path = db_path or DEFAULT_KG_PATH self.db_path = db_path or DEFAULT_KG_PATH
Path(self.db_path).parent.mkdir(parents=True, exist_ok=True) Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
self._connection = None
self._init_db() self._init_db()
def _init_db(self): def _init_db(self):
conn = self._conn() conn = self._conn()
conn.executescript(""" conn.executescript("""
PRAGMA journal_mode=WAL;
CREATE TABLE IF NOT EXISTS entities ( CREATE TABLE IF NOT EXISTS entities (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
name TEXT NOT NULL, name TEXT NOT NULL,
@@ -84,12 +87,19 @@ class KnowledgeGraph:
CREATE INDEX IF NOT EXISTS idx_triples_valid ON triples(valid_from, valid_to); CREATE INDEX IF NOT EXISTS idx_triples_valid ON triples(valid_from, valid_to);
""") """)
conn.commit() conn.commit()
conn.close()
def _conn(self): def _conn(self):
conn = sqlite3.connect(self.db_path, timeout=10) if self._connection is None:
conn.execute("PRAGMA journal_mode=WAL") self._connection = sqlite3.connect(self.db_path, timeout=10, check_same_thread=False)
return conn self._connection.execute("PRAGMA journal_mode=WAL")
self._connection.row_factory = sqlite3.Row
return self._connection
def close(self):
"""Close the database connection."""
if self._connection is not None:
self._connection.close()
self._connection = None
def _entity_id(self, name: str) -> str: def _entity_id(self, name: str) -> str:
return name.lower().replace(" ", "_").replace("'", "") return name.lower().replace(" ", "_").replace("'", "")
@@ -101,12 +111,11 @@ class KnowledgeGraph:
eid = self._entity_id(name) eid = self._entity_id(name)
props = json.dumps(properties or {}) props = json.dumps(properties or {})
conn = self._conn() conn = self._conn()
conn.execute( with conn:
"INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)", conn.execute(
(eid, name, entity_type, props), "INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)",
) (eid, name, entity_type, props),
conn.commit() )
conn.close()
return eid return eid
def add_triple( def add_triple(
@@ -134,38 +143,38 @@ class KnowledgeGraph:
# Auto-create entities if they don't exist # Auto-create entities if they don't exist
conn = self._conn() conn = self._conn()
conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject)) with conn:
conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj)) conn.execute(
"INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject)
)
conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj))
# Check for existing identical triple # Check for existing identical triple
existing = conn.execute( existing = conn.execute(
"SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL", "SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
(sub_id, pred, obj_id), (sub_id, pred, obj_id),
).fetchone() ).fetchone()
if existing: if existing:
conn.close() return existing["id"] # Already exists and still valid
return existing[0] # Already exists and still valid
triple_id = f"t_{sub_id}_{pred}_{obj_id}_{hashlib.md5(f'{valid_from}{datetime.now().isoformat()}'.encode()).hexdigest()[:8]}" triple_id = f"t_{sub_id}_{pred}_{obj_id}_{hashlib.sha256(f'{valid_from}{datetime.now().isoformat()}'.encode()).hexdigest()[:12]}"
conn.execute( conn.execute(
"""INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file) """INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
( (
triple_id, triple_id,
sub_id, sub_id,
pred, pred,
obj_id, obj_id,
valid_from, valid_from,
valid_to, valid_to,
confidence, confidence,
source_closet, source_closet,
source_file, source_file,
), ),
) )
conn.commit()
conn.close()
return triple_id return triple_id
def invalidate(self, subject: str, predicate: str, obj: str, ended: str = None): def invalidate(self, subject: str, predicate: str, obj: str, ended: str = None):
@@ -176,12 +185,11 @@ class KnowledgeGraph:
ended = ended or date.today().isoformat() ended = ended or date.today().isoformat()
conn = self._conn() conn = self._conn()
conn.execute( with conn:
"UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL", conn.execute(
(ended, sub_id, pred, obj_id), "UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
) (ended, sub_id, pred, obj_id),
conn.commit() )
conn.close()
# ── Query operations ────────────────────────────────────────────────── # ── Query operations ──────────────────────────────────────────────────
@@ -208,13 +216,13 @@ class KnowledgeGraph:
{ {
"direction": "outgoing", "direction": "outgoing",
"subject": name, "subject": name,
"predicate": row[2], "predicate": row["predicate"],
"object": row[10], # obj_name "object": row["obj_name"],
"valid_from": row[4], "valid_from": row["valid_from"],
"valid_to": row[5], "valid_to": row["valid_to"],
"confidence": row[6], "confidence": row["confidence"],
"source_closet": row[7], "source_closet": row["source_closet"],
"current": row[5] is None, "current": row["valid_to"] is None,
} }
) )
@@ -228,18 +236,17 @@ class KnowledgeGraph:
results.append( results.append(
{ {
"direction": "incoming", "direction": "incoming",
"subject": row[10], # sub_name "subject": row["sub_name"],
"predicate": row[2], "predicate": row["predicate"],
"object": name, "object": name,
"valid_from": row[4], "valid_from": row["valid_from"],
"valid_to": row[5], "valid_to": row["valid_to"],
"confidence": row[6], "confidence": row["confidence"],
"source_closet": row[7], "source_closet": row["source_closet"],
"current": row[5] is None, "current": row["valid_to"] is None,
} }
) )
conn.close()
return results return results
def query_relationship(self, predicate: str, as_of: str = None): def query_relationship(self, predicate: str, as_of: str = None):
@@ -262,15 +269,14 @@ class KnowledgeGraph:
for row in conn.execute(query, params).fetchall(): for row in conn.execute(query, params).fetchall():
results.append( results.append(
{ {
"subject": row[10], "subject": row["sub_name"],
"predicate": pred, "predicate": pred,
"object": row[11], "object": row["obj_name"],
"valid_from": row[4], "valid_from": row["valid_from"],
"valid_to": row[5], "valid_to": row["valid_to"],
"current": row[5] is None, "current": row["valid_to"] is None,
} }
) )
conn.close()
return results return results
def timeline(self, entity_name: str = None): def timeline(self, entity_name: str = None):
@@ -300,15 +306,14 @@ class KnowledgeGraph:
LIMIT 100 LIMIT 100
""").fetchall() """).fetchall()
conn.close()
return [ return [
{ {
"subject": r[10], "subject": r["sub_name"],
"predicate": r[2], "predicate": r["predicate"],
"object": r[11], "object": r["obj_name"],
"valid_from": r[4], "valid_from": r["valid_from"],
"valid_to": r[5], "valid_to": r["valid_to"],
"current": r[5] is None, "current": r["valid_to"] is None,
} }
for r in rows for r in rows
] ]
@@ -317,17 +322,18 @@ class KnowledgeGraph:
def stats(self): def stats(self):
conn = self._conn() conn = self._conn()
entities = conn.execute("SELECT COUNT(*) FROM entities").fetchone()[0] entities = conn.execute("SELECT COUNT(*) as cnt FROM entities").fetchone()["cnt"]
triples = conn.execute("SELECT COUNT(*) FROM triples").fetchone()[0] triples = conn.execute("SELECT COUNT(*) as cnt FROM triples").fetchone()["cnt"]
current = conn.execute("SELECT COUNT(*) FROM triples WHERE valid_to IS NULL").fetchone()[0] current = conn.execute(
"SELECT COUNT(*) as cnt FROM triples WHERE valid_to IS NULL"
).fetchone()["cnt"]
expired = triples - current expired = triples - current
predicates = [ predicates = [
r[0] r["predicate"]
for r in conn.execute( for r in conn.execute(
"SELECT DISTINCT predicate FROM triples ORDER BY predicate" "SELECT DISTINCT predicate FROM triples ORDER BY predicate"
).fetchall() ).fetchall()
] ]
conn.close()
return { return {
"entities": entities, "entities": entities,
"triples": triples, "triples": triples,
+142 -10
View File
@@ -24,8 +24,9 @@ import json
import logging import logging
import hashlib import hashlib
from datetime import datetime from datetime import datetime
from pathlib import Path
from .config import MempalaceConfig from .config import MempalaceConfig, sanitize_name, sanitize_content
from .version import __version__ from .version import __version__
from .searcher import search_memories from .searcher import search_memories
from .palace_graph import traverse, find_tunnels, graph_stats from .palace_graph import traverse, find_tunnels, graph_stats
@@ -66,16 +67,60 @@ _client_cache = None
_collection_cache = None _collection_cache = None
# ==================== WRITE-AHEAD LOG ====================
# Every write operation is logged to a JSONL file before execution.
# This provides an audit trail for detecting memory poisoning and
# enables review/rollback of writes from external or untrusted sources.
_WAL_DIR = Path(os.path.expanduser("~/.mempalace/wal"))
_WAL_DIR.mkdir(parents=True, exist_ok=True)
try:
_WAL_DIR.chmod(0o700)
except (OSError, NotImplementedError):
pass
_WAL_FILE = _WAL_DIR / "write_log.jsonl"
def _wal_log(operation: str, params: dict, result: dict = None):
"""Append a write operation to the write-ahead log."""
entry = {
"timestamp": datetime.now().isoformat(),
"operation": operation,
"params": params,
"result": result,
}
try:
with open(_WAL_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(entry, default=str) + "\n")
try:
_WAL_FILE.chmod(0o600)
except (OSError, NotImplementedError):
pass
except Exception as e:
logger.error(f"WAL write failed: {e}")
_client_cache = None
_collection_cache = None
def _get_client():
"""Return a singleton ChromaDB PersistentClient."""
global _client_cache
if _client_cache is None:
_client_cache = chromadb.PersistentClient(path=_config.palace_path)
return _client_cache
def _get_collection(create=False): def _get_collection(create=False):
"""Return the ChromaDB collection, caching the client between calls.""" """Return the ChromaDB collection, caching the client between calls."""
global _client_cache, _collection_cache global _collection_cache
try: try:
if _client_cache is None: client = _get_client()
_client_cache = chromadb.PersistentClient(path=_config.palace_path)
if create: if create:
_collection_cache = _client_cache.get_or_create_collection(_config.collection_name) _collection_cache = client.get_or_create_collection(_config.collection_name)
elif _collection_cache is None: elif _collection_cache is None:
_collection_cache = _client_cache.get_collection(_config.collection_name) _collection_cache = client.get_collection(_config.collection_name)
return _collection_cache return _collection_cache
except Exception: except Exception:
return None return None
@@ -330,11 +375,30 @@ def tool_add_drawer(
wing: str, room: str, content: str, source_file: str = None, added_by: str = "mcp" wing: str, room: str, content: str, source_file: str = None, added_by: str = "mcp"
): ):
"""File verbatim content into a wing/room. Checks for duplicates first.""" """File verbatim content into a wing/room. Checks for duplicates first."""
try:
wing = sanitize_name(wing, "wing")
room = sanitize_name(room, "room")
content = sanitize_content(content)
except ValueError as e:
return {"success": False, "error": str(e)}
col = _get_collection(create=True) col = _get_collection(create=True)
if not col: if not col:
return _no_palace() return _no_palace()
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}" drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((wing + room + content[:100]).encode()).hexdigest()[:24]}"
_wal_log(
"add_drawer",
{
"drawer_id": drawer_id,
"wing": wing,
"room": room,
"added_by": added_by,
"content_length": len(content),
"content_preview": content[:200],
},
)
# Idempotency: if the deterministic ID already exists, return success as a no-op. # Idempotency: if the deterministic ID already exists, return success as a no-op.
try: try:
@@ -373,6 +437,19 @@ def tool_delete_drawer(drawer_id: str):
existing = col.get(ids=[drawer_id]) existing = col.get(ids=[drawer_id])
if not existing["ids"]: if not existing["ids"]:
return {"success": False, "error": f"Drawer not found: {drawer_id}"} return {"success": False, "error": f"Drawer not found: {drawer_id}"}
# Log the deletion with the content being removed for audit trail
deleted_content = existing.get("documents", [""])[0] if existing.get("documents") else ""
deleted_meta = existing.get("metadatas", [{}])[0] if existing.get("metadatas") else {}
_wal_log(
"delete_drawer",
{
"drawer_id": drawer_id,
"deleted_meta": deleted_meta,
"content_preview": deleted_content[:200],
},
)
try: try:
col.delete(ids=[drawer_id]) col.delete(ids=[drawer_id])
logger.info(f"Deleted drawer: {drawer_id}") logger.info(f"Deleted drawer: {drawer_id}")
@@ -394,6 +471,23 @@ def tool_kg_add(
subject: str, predicate: str, object: str, valid_from: str = None, source_closet: str = None subject: str, predicate: str, object: str, valid_from: str = None, source_closet: str = None
): ):
"""Add a relationship to the knowledge graph.""" """Add a relationship to the knowledge graph."""
try:
subject = sanitize_name(subject, "subject")
predicate = sanitize_name(predicate, "predicate")
object = sanitize_name(object, "object")
except ValueError as e:
return {"success": False, "error": str(e)}
_wal_log(
"kg_add",
{
"subject": subject,
"predicate": predicate,
"object": object,
"valid_from": valid_from,
"source_closet": source_closet,
},
)
triple_id = _kg.add_triple( triple_id = _kg.add_triple(
subject, predicate, object, valid_from=valid_from, source_closet=source_closet subject, predicate, object, valid_from=valid_from, source_closet=source_closet
) )
@@ -402,6 +496,10 @@ def tool_kg_add(
def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None): def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None):
"""Mark a fact as no longer true (set end date).""" """Mark a fact as no longer true (set end date)."""
_wal_log(
"kg_invalidate",
{"subject": subject, "predicate": predicate, "object": object, "ended": ended},
)
_kg.invalidate(subject, predicate, object, ended=ended) _kg.invalidate(subject, predicate, object, ended=ended)
return { return {
"success": True, "success": True,
@@ -432,6 +530,12 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
This is the agent's personal journal — observations, thoughts, This is the agent's personal journal — observations, thoughts,
what it worked on, what it noticed, what it thinks matters. what it worked on, what it noticed, what it thinks matters.
""" """
try:
agent_name = sanitize_name(agent_name, "agent_name")
entry = sanitize_content(entry)
except ValueError as e:
return {"success": False, "error": str(e)}
wing = f"wing_{agent_name.lower().replace(' ', '_')}" wing = f"wing_{agent_name.lower().replace(' ', '_')}"
room = "diary" room = "diary"
col = _get_collection(create=True) col = _get_collection(create=True)
@@ -439,9 +543,23 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
return _no_palace() return _no_palace()
now = datetime.now() now = datetime.now()
entry_id = f"diary_{wing}_{now.strftime('%Y%m%d_%H%M%S')}_{hashlib.md5(entry[:50].encode()).hexdigest()[:8]}" entry_id = f"diary_{wing}_{now.strftime('%Y%m%d_%H%M%S')}_{hashlib.sha256(entry[:50].encode()).hexdigest()[:12]}"
_wal_log(
"diary_write",
{
"agent_name": agent_name,
"topic": topic,
"entry_id": entry_id,
"entry_preview": entry[:200],
},
)
try: try:
# TODO: Future versions should expand AAAK before embedding to improve
# semantic search quality. For now, store raw AAAK in metadata so it's
# preserved, and keep the document as-is for embedding (even though
# compressed AAAK degrades embedding quality).
col.add( col.add(
ids=[entry_id], ids=[entry_id],
documents=[entry], documents=[entry],
@@ -767,17 +885,31 @@ TOOLS = {
} }
SUPPORTED_PROTOCOL_VERSIONS = [
"2025-11-25",
"2025-06-18",
"2025-03-26",
"2024-11-05",
]
def handle_request(request): def handle_request(request):
method = request.get("method", "") method = request.get("method", "")
params = request.get("params", {}) params = request.get("params", {})
req_id = request.get("id") req_id = request.get("id")
if method == "initialize": if method == "initialize":
client_version = params.get("protocolVersion", SUPPORTED_PROTOCOL_VERSIONS[-1])
negotiated = (
client_version
if client_version in SUPPORTED_PROTOCOL_VERSIONS
else SUPPORTED_PROTOCOL_VERSIONS[0]
)
return { return {
"jsonrpc": "2.0", "jsonrpc": "2.0",
"id": req_id, "id": req_id,
"result": { "result": {
"protocolVersion": "2024-11-05", "protocolVersion": negotiated,
"capabilities": {"tools": {}}, "capabilities": {"tools": {}},
"serverInfo": {"name": "mempalace", "version": __version__}, "serverInfo": {"name": "mempalace", "version": __version__},
}, },
@@ -797,7 +929,7 @@ def handle_request(request):
} }
elif method == "tools/call": elif method == "tools/call":
tool_name = params.get("name") tool_name = params.get("name")
tool_args = params.get("arguments", {}) tool_args = params.get("arguments") or {}
if tool_name not in TOOLS: if tool_name not in TOOLS:
return { return {
"jsonrpc": "2.0", "jsonrpc": "2.0",
+14 -58
View File
@@ -17,6 +17,8 @@ from collections import defaultdict
import chromadb import chromadb
from .palace import SKIP_DIRS, get_collection, file_already_mined
READABLE_EXTENSIONS = { READABLE_EXTENSIONS = {
".txt", ".txt",
".md", ".md",
@@ -40,32 +42,6 @@ READABLE_EXTENSIONS = {
".toml", ".toml",
} }
SKIP_DIRS = {
".git",
"node_modules",
"__pycache__",
".venv",
"venv",
"env",
"dist",
"build",
".next",
"coverage",
".mempalace",
".ruff_cache",
".mypy_cache",
".pytest_cache",
".cache",
".tox",
".nox",
".idea",
".vscode",
".ipynb_checkpoints",
".eggs",
"htmlcov",
"target",
}
SKIP_FILENAMES = { SKIP_FILENAMES = {
"mempalace.yaml", "mempalace.yaml",
"mempalace.yml", "mempalace.yml",
@@ -78,6 +54,7 @@ SKIP_FILENAMES = {
CHUNK_SIZE = 800 # chars per drawer CHUNK_SIZE = 800 # chars per drawer
CHUNK_OVERLAP = 100 # overlap between chunks CHUNK_OVERLAP = 100 # overlap between chunks
MIN_CHUNK_SIZE = 50 # skip tiny chunks MIN_CHUNK_SIZE = 50 # skip tiny chunks
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB — skip files larger than this
# ============================================================================= # =============================================================================
@@ -393,41 +370,11 @@ def chunk_text(content: str, source_file: str) -> list:
# ============================================================================= # =============================================================================
def get_collection(palace_path: str):
os.makedirs(palace_path, exist_ok=True)
client = chromadb.PersistentClient(path=palace_path)
try:
return client.get_collection("mempalace_drawers")
except Exception:
return client.create_collection("mempalace_drawers")
def file_already_mined(collection, source_file: str) -> bool:
"""Fast check: has this file been filed before and is unchanged?
Compares the stored mtime in drawer metadata against the file's current
mtime. Returns False (needs re-mining) when the file has been modified
since it was last mined, or when no mtime was stored.
"""
try:
results = collection.get(where={"source_file": source_file}, limit=1)
if not results.get("ids"):
return False
stored_meta = results["metadatas"][0] if results.get("metadatas") else {}
stored_mtime = stored_meta.get("source_mtime")
if stored_mtime is None:
return False
current_mtime = os.path.getmtime(source_file)
return float(stored_mtime) == current_mtime
except Exception:
return False
def add_drawer( def add_drawer(
collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
): ):
"""Add one drawer to the palace.""" """Add one drawer to the palace."""
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}" drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk_index)).encode()).hexdigest()[:24]}"
try: try:
metadata = { metadata = {
"wing": wing, "wing": wing,
@@ -470,7 +417,7 @@ def process_file(
# Skip if already filed # Skip if already filed
source_file = str(filepath) source_file = str(filepath)
if not dry_run and file_already_mined(collection, source_file): if not dry_run and file_already_mined(collection, source_file, check_mtime=True):
return 0, None return 0, None
try: try:
@@ -562,6 +509,15 @@ def scan_project(
if respect_gitignore and active_matchers and not force_include: if respect_gitignore and active_matchers and not force_include:
if is_gitignored(filepath, active_matchers, is_dir=False): if is_gitignored(filepath, active_matchers, is_dir=False):
continue continue
# Skip symlinks — prevents following links to /dev/urandom, etc.
if filepath.is_symlink():
continue
# Skip files exceeding size limit
try:
if filepath.stat().st_size > MAX_FILE_SIZE:
continue
except OSError:
continue
files.append(filepath) files.append(filepath)
return files return files
+6
View File
@@ -25,6 +25,12 @@ def normalize(filepath: str) -> str:
Load a file and normalize to transcript format if it's a chat export. Load a file and normalize to transcript format if it's a chat export.
Plain text files pass through unchanged. Plain text files pass through unchanged.
""" """
try:
file_size = os.path.getsize(filepath)
except OSError as e:
raise IOError(f"Could not read {filepath}: {e}")
if file_size > 500 * 1024 * 1024: # 500 MB safety limit
raise IOError(f"File too large ({file_size // (1024*1024)} MB): {filepath}")
try: try:
with open(filepath, "r", encoding="utf-8", errors="replace") as f: with open(filepath, "r", encoding="utf-8", errors="replace") as f:
content = f.read() content = f.read()
+71
View File
@@ -0,0 +1,71 @@
"""
palace.py — Shared palace operations.
Consolidates ChromaDB access patterns used by both miners and the MCP server.
"""
import os
import chromadb
SKIP_DIRS = {
".git",
"node_modules",
"__pycache__",
".venv",
"venv",
"env",
"dist",
"build",
".next",
"coverage",
".mempalace",
".ruff_cache",
".mypy_cache",
".pytest_cache",
".cache",
".tox",
".nox",
".idea",
".vscode",
".ipynb_checkpoints",
".eggs",
"htmlcov",
"target",
}
def get_collection(palace_path: str, collection_name: str = "mempalace_drawers"):
"""Get or create the palace ChromaDB collection."""
os.makedirs(palace_path, exist_ok=True)
try:
os.chmod(palace_path, 0o700)
except (OSError, NotImplementedError):
pass
client = chromadb.PersistentClient(path=palace_path)
try:
return client.get_collection(collection_name)
except Exception:
return client.create_collection(collection_name)
def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool:
"""Check if a file has already been filed in the palace.
When check_mtime=True (used by project miner), returns False if the file
has been modified since it was last mined, so it gets re-mined.
When check_mtime=False (used by convo miner), just checks existence.
"""
try:
results = collection.get(where={"source_file": source_file}, limit=1)
if not results.get("ids"):
return False
if check_mtime:
stored_meta = results.get("metadatas", [{}])[0]
stored_mtime = stored_meta.get("source_mtime")
if stored_mtime is None:
return False
current_mtime = os.path.getmtime(source_file)
return float(stored_mtime) == current_mtime
return True
except Exception:
return False
+8
View File
@@ -182,6 +182,10 @@ def split_file(filepath, output_dir, dry_run=False):
Returns list of output paths written (or would be written if dry_run). Returns list of output paths written (or would be written if dry_run).
""" """
path = Path(filepath) path = Path(filepath)
max_size = 500 * 1024 * 1024 # 500 MB safety limit
if path.stat().st_size > max_size:
print(f" SKIP: {path.name} exceeds {max_size // (1024*1024)} MB limit")
return []
lines = path.read_text(errors="replace").splitlines(keepends=True) lines = path.read_text(errors="replace").splitlines(keepends=True)
boundaries = find_session_boundaries(lines) boundaries = find_session_boundaries(lines)
@@ -266,7 +270,11 @@ def main():
files = sorted(src_dir.glob("*.txt")) files = sorted(src_dir.glob("*.txt"))
mega_files = [] mega_files = []
max_scan_size = 500 * 1024 * 1024 # 500 MB
for f in files: for f in files:
if f.stat().st_size > max_scan_size:
print(f" SKIP: {f.name} exceeds {max_scan_size // (1024*1024)} MB limit")
continue
lines = f.read_text(errors="replace").splitlines(keepends=True) lines = f.read_text(errors="replace").splitlines(keepends=True)
boundaries = find_session_boundaries(lines) boundaries = find_session_boundaries(lines)
if len(boundaries) >= args.min_sessions: if len(boundaries) >= args.min_sessions:
+1 -1
View File
@@ -1,3 +1,3 @@
"""Single source of truth for the MemPalace package version.""" """Single source of truth for the MemPalace package version."""
__version__ = "3.0.14" __version__ = "3.1.0"
+2 -2
View File
@@ -1,6 +1,6 @@
[project] [project]
name = "mempalace" name = "mempalace"
version = "3.0.14" version = "3.1.0"
description = "Give your AI a memory — mine projects and conversations into a searchable palace. No API key required." description = "Give your AI a memory — mine projects and conversations into a searchable palace. No API key required."
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"
@@ -26,7 +26,7 @@ classifiers = [
] ]
dependencies = [ dependencies = [
"chromadb>=0.5.0,<0.7", "chromadb>=0.5.0,<0.7",
"pyyaml>=6.0", "pyyaml>=6.0,<7",
] ]
[project.urls] [project.urls]
+43
View File
@@ -2,6 +2,7 @@
import argparse import argparse
import sys import sys
from pathlib import Path
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest import pytest
@@ -326,6 +327,35 @@ def test_main_split_dispatches():
mock_cmd.assert_called_once() mock_cmd.assert_called_once()
def test_mcp_command_prints_setup_guidance(monkeypatch, capsys):
monkeypatch.setattr(sys, "argv", ["mempalace", "mcp"])
main()
captured = capsys.readouterr()
assert "MemPalace MCP quick setup:" in captured.out
assert "claude mcp add mempalace -- python -m mempalace.mcp_server" in captured.out
assert "\nOptional custom palace:\n" in captured.out
assert "python -m mempalace.mcp_server --palace /path/to/palace" in captured.out
assert "[--palace /path/to/palace]" not in captured.out
assert captured.err == ""
def test_mcp_command_uses_custom_palace_path_when_provided(monkeypatch, capsys):
monkeypatch.setattr(sys, "argv", ["mempalace", "--palace", "~/tmp/my palace", "mcp"])
main()
captured = capsys.readouterr()
expanded = str(Path("~/tmp/my palace").expanduser())
assert "python -m mempalace.mcp_server --palace" in captured.out
assert expanded in captured.out
assert "Optional custom palace:" not in captured.out
assert "[--palace /path/to/palace]" not in captured.out
assert captured.err == ""
def test_main_hook_no_subcommand_prints_help(capsys): def test_main_hook_no_subcommand_prints_help(capsys):
with patch("sys.argv", ["mempalace", "hook"]): with patch("sys.argv", ["mempalace", "hook"]):
main() main()
@@ -607,3 +637,16 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys):
out = capsys.readouterr().out out = capsys.readouterr().out
assert "Stored" in out assert "Stored" in out
mock_comp_col.upsert.assert_called_once() mock_comp_col.upsert.assert_called_once()
def test_cmd_repair_trailing_slash_does_not_recurse():
"""Repair with trailing slash should put backup outside palace dir (#395)."""
import os
args = argparse.Namespace(palace="/tmp/fake_palace/")
with patch("mempalace.cli.os.path.isdir", return_value=False):
cmd_repair(args)
# Verify the rstrip logic: palace_path should not end with separator
palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
backup_path = palace_path + ".backup"
assert not backup_path.startswith(palace_path + os.sep)
+61
View File
@@ -42,6 +42,50 @@ class TestHandleRequest:
assert resp["result"]["serverInfo"]["name"] == "mempalace" assert resp["result"]["serverInfo"]["name"] == "mempalace"
assert resp["id"] == 1 assert resp["id"] == 1
def test_initialize_negotiates_client_version(self):
from mempalace.mcp_server import handle_request
resp = handle_request(
{
"method": "initialize",
"id": 1,
"params": {"protocolVersion": "2025-11-25"},
}
)
assert resp["result"]["protocolVersion"] == "2025-11-25"
def test_initialize_negotiates_older_supported_version(self):
from mempalace.mcp_server import handle_request
resp = handle_request(
{
"method": "initialize",
"id": 1,
"params": {"protocolVersion": "2025-03-26"},
}
)
assert resp["result"]["protocolVersion"] == "2025-03-26"
def test_initialize_unknown_version_falls_back_to_latest(self):
from mempalace.mcp_server import handle_request
resp = handle_request(
{
"method": "initialize",
"id": 1,
"params": {"protocolVersion": "9999-12-31"},
}
)
from mempalace.mcp_server import SUPPORTED_PROTOCOL_VERSIONS
assert resp["result"]["protocolVersion"] == SUPPORTED_PROTOCOL_VERSIONS[0]
def test_initialize_missing_version_uses_oldest(self):
from mempalace.mcp_server import handle_request, SUPPORTED_PROTOCOL_VERSIONS
resp = handle_request({"method": "initialize", "id": 1, "params": {}})
assert resp["result"]["protocolVersion"] == SUPPORTED_PROTOCOL_VERSIONS[-1]
def test_notifications_initialized_returns_none(self): def test_notifications_initialized_returns_none(self):
from mempalace.mcp_server import handle_request from mempalace.mcp_server import handle_request
@@ -59,6 +103,23 @@ class TestHandleRequest:
assert "mempalace_add_drawer" in names assert "mempalace_add_drawer" in names
assert "mempalace_kg_add" in names assert "mempalace_kg_add" in names
def test_null_arguments_does_not_hang(self, monkeypatch, config, palace_path, seeded_kg):
"""Sending arguments: null should return a result, not hang (#394)."""
_patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import handle_request
_client, _col = _get_collection(palace_path, create=True)
del _client
resp = handle_request(
{
"method": "tools/call",
"id": 10,
"params": {"name": "mempalace_status", "arguments": None},
}
)
assert "error" not in resp
assert resp["result"] is not None
def test_unknown_tool(self): def test_unknown_tool(self):
from mempalace.mcp_server import handle_request from mempalace.mcp_server import handle_request
+54
View File
@@ -7,6 +7,7 @@ import chromadb
import yaml import yaml
from mempalace.miner import mine, scan_project from mempalace.miner import mine, scan_project
from mempalace.palace import file_already_mined
def write_file(path: Path, content: str): def write_file(path: Path, content: str):
@@ -206,3 +207,56 @@ def test_scan_project_skip_dirs_still_apply_without_override():
assert scanned_files(project_root, respect_gitignore=False) == ["main.py"] assert scanned_files(project_root, respect_gitignore=False) == ["main.py"]
finally: finally:
shutil.rmtree(tmpdir) shutil.rmtree(tmpdir)
def test_file_already_mined_check_mtime():
tmpdir = tempfile.mkdtemp()
try:
palace_path = os.path.join(tmpdir, "palace")
os.makedirs(palace_path)
client = chromadb.PersistentClient(path=palace_path)
col = client.get_or_create_collection("mempalace_drawers")
test_file = os.path.join(tmpdir, "test.txt")
with open(test_file, "w") as f:
f.write("hello world")
mtime = os.path.getmtime(test_file)
# Not mined yet
assert file_already_mined(col, test_file) is False
assert file_already_mined(col, test_file, check_mtime=True) is False
# Add it with mtime
col.add(
ids=["d1"],
documents=["hello world"],
metadatas=[{"source_file": test_file, "source_mtime": str(mtime)}],
)
# Already mined (no mtime check)
assert file_already_mined(col, test_file) is True
# Already mined (mtime matches)
assert file_already_mined(col, test_file, check_mtime=True) is True
# Modify file and force a different mtime (Windows has low mtime resolution)
with open(test_file, "w") as f:
f.write("modified content")
os.utime(test_file, (mtime + 10, mtime + 10))
# Still mined without mtime check
assert file_already_mined(col, test_file) is True
# Needs re-mining with mtime check
assert file_already_mined(col, test_file, check_mtime=True) is False
# Record with no mtime stored should return False for check_mtime
col.add(
ids=["d2"],
documents=["other"],
metadatas=[{"source_file": "/fake/no_mtime.txt"}],
)
assert file_already_mined(col, "/fake/no_mtime.txt", check_mtime=True) is False
finally:
# Release ChromaDB file handles before cleanup (required on Windows)
del col, client
shutil.rmtree(tmpdir, ignore_errors=True)
+10
View File
@@ -499,3 +499,13 @@ def test_messages_to_transcript_assistant_first():
result = _messages_to_transcript(msgs, spellcheck=False) result = _messages_to_transcript(msgs, spellcheck=False)
assert "preamble" in result assert "preamble" in result
assert "> Q" in result assert "> Q" in result
def test_normalize_rejects_large_file():
"""Files over 500 MB should raise IOError before reading."""
with patch("mempalace.normalize.os.path.getsize", return_value=600 * 1024 * 1024):
try:
normalize("/fake/huge_file.txt")
assert False, "Should have raised IOError"
except IOError as e:
assert "too large" in str(e).lower()