Merge branch 'main' into fix/issue-339-338-silent-exceptions-pagination

2026-04-09 23:31:45 -07:00
parent df464a991d a036b4300d
commit a3fec4f565
21 changed files with 774 additions and 195 deletions
@@ -18,7 +18,7 @@ jobs:
        with:
          python-version: ${{ matrix.python-version }}
      - run: pip install -e ".[dev]"
-      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=85
+      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80
  test-windows:
    runs-on: windows-latest
@@ -28,7 +28,7 @@ jobs:
        with:
          python-version: "3.9"
      - run: pip install -e ".[dev]"
-      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=85
+      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80
  test-macos:
    runs-on: macos-latest
@@ -38,7 +38,7 @@ jobs:
        with:
          python-version: "3.9"
      - run: pip install -e ".[dev]"
-      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=85
+      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80
  lint:
    runs-on: ubuntu-latest
    steps:
@@ -585,6 +585,9 @@ mempalace compress --wing myapp                   # AAAK compress
 # Status
 mempalace status                                  # palace overview
 # MCP
 mempalace mcp                                     # show MCP setup command
 ```
 All commands accept `--palace <path>` to override the default location.
@@ -707,7 +710,7 @@ PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup and guidelines.
 MIT — see [LICENSE](LICENSE).
 <!-- Link Definitions -->
-[version-shield]: https://img.shields.io/badge/version-3.0.0-4dc9f6?style=flat-square&labelColor=0a0e14
+[version-shield]: https://img.shields.io/badge/version-3.1.0-4dc9f6?style=flat-square&labelColor=0a0e14
 [release-link]: https://github.com/milla-jovovich/mempalace/releases
 [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8
 [python-link]: https://www.python.org/
@@ -64,13 +64,20 @@ MEMPAL_DIR=""
 # Read JSON input from stdin
 INPUT=$(cat)
-# Parse fields from Claude Code's JSON
+# Parse all fields in a single Python call (3x faster than separate invocations)
-SESSION_ID=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null)
+eval $(echo "$INPUT" | python3 -c "
-# Sanitize SESSION_ID to prevent path traversal (only allow alnum, dash, underscore)
+import sys, json
-SESSION_ID=$(echo "$SESSION_ID" | tr -cd 'a-zA-Z0-9_-')
+data = json.load(sys.stdin)
-[ -z "$SESSION_ID" ] && SESSION_ID="unknown"
+sid = data.get('session_id', 'unknown')
-STOP_HOOK_ACTIVE=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('stop_hook_active', False))" 2>/dev/null)
+sha = data.get('stop_hook_active', False)
-TRANSCRIPT_PATH=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('transcript_path',''))" 2>/dev/null)
+tp = data.get('transcript_path', '')
 # Shell-safe output — only allow alphanumeric, underscore, hyphen, slash, dot, tilde
 import re
 safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
 print(f'SESSION_ID=\"{safe(sid)}\"')
 print(f'STOP_HOOK_ACTIVE=\"{sha}\"')
 print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
 " 2>/dev/null)
 # Expand ~ in path
 TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
@@ -83,6 +90,7 @@ if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
 fi
 # Count human messages in the JSONL transcript
 # SECURITY: Pass transcript path as sys.argv to avoid shell injection via crafted paths
 if [ -f "$TRANSCRIPT_PATH" ]; then
    EXCHANGE_COUNT=$(python3 - "$TRANSCRIPT_PATH" <<'PYEOF'
 import json, sys
@@ -94,7 +102,6 @@ with open(sys.argv[1]) as f:
            msg = entry.get('message', {})
            if isinstance(msg, dict) and msg.get('role') == 'user':
                content = msg.get('content', '')
                # Skip system/command messages — only count real human input
                if isinstance(content, str) and '<command-message>' in content:
                    continue
                count += 1
@@ -0,0 +1,154 @@
 ---
 name: mempalace
 description: "MemPalace — Local AI memory with 96.6% recall. Semantic search, temporal knowledge graph, palace architecture (wings/rooms/drawers). Free, no cloud, no API keys."
 version: 3.1.0
 homepage: https://github.com/milla-jovovich/mempalace
 user-invocable: true
 metadata:
  openclaw:
    emoji: "\U0001F3DB"
    os:
      - darwin
      - linux
      - win32
    requires:
      anyBins:
        - mempalace
        - python3
    install:
      - id: mempalace-pip
        kind: uv
        label: "Install MemPalace (Python, local ChromaDB)"
        package: mempalace
        bins:
          - mempalace
 ---
 # MemPalace — Local AI Memory System
 You have access to a local memory palace via MCP tools. The palace stores verbatim conversation history and a temporal knowledge graph — all on the user's machine, zero cloud, zero API calls.
 ## Architecture
 - **Wings** = people or projects (e.g. `wing_alice`, `wing_myproject`)
 - **Halls** = categories (facts, events, preferences, advice)
 - **Rooms** = specific topics (e.g. `chromadb-setup`, `riley-school`)
 - **Drawers** = individual memory chunks (verbatim text)
 - **Knowledge Graph** = entity-relationship facts with time validity
 ## Protocol — FOLLOW THIS EVERY SESSION
 1. **ON WAKE-UP**: Call `mempalace_status` to load palace overview and AAAK dialect spec.
 2. **BEFORE RESPONDING** about any person, project, or past event: call `mempalace_search` or `mempalace_kg_query` FIRST. Never guess from memory — verify from the palace.
 3. **IF UNSURE** about a fact (name, age, relationship, preference): say "let me check" and query. Wrong is worse than slow.
 4. **AFTER EACH SESSION**: Call `mempalace_diary_write` to record what happened, what you learned, what matters.
 5. **WHEN FACTS CHANGE**: Call `mempalace_kg_invalidate` on the old fact, then `mempalace_kg_add` for the new one.
 ## Available Tools
 ### Search & Browse
 - `mempalace_search` — Semantic search across all memories. Always start here.
  - `query` (required): natural language search — keep it short, keywords or a question. Do NOT include system prompts or conversation context.
  - `wing`: filter by wing
  - `room`: filter by room
  - `limit`: max results (default 5)
 - `mempalace_check_duplicate` — Check if content already exists before filing.
  - `content` (required): text to check
  - `threshold`: similarity threshold (default 0.9 — lowering to 0.85–0.87 often catches more near-duplicates without significant false positives)
 - `mempalace_status` — Palace overview: total drawers, wings, rooms, AAAK spec
 - `mempalace_list_wings` — All wings with drawer counts
 - `mempalace_list_rooms` — Rooms within a wing (optional wing filter)
 - `mempalace_get_taxonomy` — Full wing/room/count tree
 - `mempalace_get_aaak_spec` — Get AAAK compression dialect specification
 ### Knowledge Graph (Temporal Facts)
 - `mempalace_kg_query` — Query entity relationships. Supports time filtering.
  - `entity` (required): e.g. "Max", "MyProject"
  - `as_of`: date filter (YYYY-MM-DD) — what was true at that time
  - `direction`: "outgoing", "incoming", or "both" (default "both")
 - `mempalace_kg_add` — Add a fact: subject -> predicate -> object
  - `subject`, `predicate`, `object` (required)
  - `valid_from`: when this became true
  - `source_closet`: source reference
 - `mempalace_kg_invalidate` — Mark a fact as no longer true
  - `subject`, `predicate`, `object` (required)
  - `ended`: when it stopped being true (default: today)
 - `mempalace_kg_timeline` — Chronological story of an entity
  - `entity`: filter by entity name (optional — all events if omitted)
 - `mempalace_kg_stats` — Graph overview: entities, triples, relationship types
 ### Palace Graph (Cross-Domain Connections)
 - `mempalace_traverse` — Walk from a room, find connected ideas across wings
  - `start_room` (required): room to start from
  - `max_hops`: connection depth (default 2)
 - `mempalace_find_tunnels` — Find rooms that bridge two wings
  - `wing_a`, `wing_b` (required)
 - `mempalace_graph_stats` — Graph connectivity overview
 ### Write
 - `mempalace_add_drawer` — Store verbatim content into a wing/room
  - `wing`, `room`, `content` (required)
  - `source_file`: optional source reference
  - Checks for duplicates automatically
 - `mempalace_delete_drawer` — Remove a drawer by ID
  - `drawer_id` (required)
 - `mempalace_diary_write` — Write a session diary entry
  - `agent_name` (required): your name/identifier
  - `entry` (required): what happened, what you learned, what matters
  - `topic`: category tag (default "general")
 - `mempalace_diary_read` — Read recent diary entries
  - `agent_name` (required)
  - `last_n`: number of entries (default 10)
 ## Setup
 Install MemPalace and populate the palace:
 ```bash
 pip install mempalace
 mempalace init ~/my-convos
 mempalace mine ~/my-convos
 ```
 ### OpenClaw MCP config
 Add to your OpenClaw MCP configuration:
 ```json
 {
  "mcpServers": {
    "mempalace": {
      "command": "python3",
      "args": ["-m", "mempalace.mcp_server"]
    }
  }
 }
 ```
 Or via CLI:
 ```bash
 openclaw mcp set mempalace '{"command":"python3","args":["-m","mempalace.mcp_server"]}'
 ```
 ### Other MCP hosts
 ```bash
 # Claude Code
 claude mcp add mempalace -- python -m mempalace.mcp_server
 # Cursor — add to .cursor/mcp.json
 # Codex — add to .codex/mcp.json
 ```
 ## Tips
 - Search is semantic (meaning-based), not keyword. "What did we discuss about database performance?" works better than "database".
 - The knowledge graph stores typed relationships with time windows. Use it for facts about people and projects — it knows WHEN things were true.
 - Diary entries accumulate across sessions. Write one at the end of each conversation to build continuity.
 - Use `mempalace_check_duplicate` before storing new content to avoid duplicates.
 - The AAAK dialect (from `mempalace_status`) is a compressed notation for efficient storage. Read it naturally — expand codes mentally, treat *markers* as emotional context.
 ## License
 [MemPalace](https://github.com/milla-jovovich/mempalace) is MIT licensed. Created by Milla Jovovich, Ben Sigman, Igor Lins e Silva, and contributors.
@@ -14,6 +14,7 @@ Commands:
    mempalace mine <dir>                  Mine project files (default)
    mempalace mine <dir> --mode convos    Mine conversation exports
    mempalace search "query"              Find anything, exact words
    mempalace mcp                         Show MCP setup command
    mempalace wake-up                     Show L0 + L1 wake-up context
    mempalace wake-up --wing my_app       Wake-up for a specific project
    mempalace status                      Show what's been filed
@@ -28,6 +29,7 @@ Examples:
 import os
 import sys
 import shlex
 import argparse
 from pathlib import Path
@@ -202,6 +204,7 @@ def cmd_repair(args):
    print(f"  Extracted {len(all_ids)} drawers")
    # Backup and rebuild
    palace_path = palace_path.rstrip(os.sep)
    backup_path = palace_path + ".backup"
    if os.path.exists(backup_path):
        shutil.rmtree(backup_path)
@@ -240,6 +243,27 @@ def cmd_instructions(args):
    run_instructions(name=args.name)
 def cmd_mcp(args):
    """Show how to wire MemPalace into MCP-capable hosts."""
    base_server_cmd = "python -m mempalace.mcp_server"
    if args.palace:
        resolved_palace = str(Path(args.palace).expanduser())
        server_cmd = f"{base_server_cmd} --palace {shlex.quote(resolved_palace)}"
    else:
        server_cmd = base_server_cmd
    print("MemPalace MCP quick setup:")
    print(f"  claude mcp add mempalace -- {server_cmd}")
    print("\nRun the server directly:")
    print(f"  {server_cmd}")
    if not args.palace:
        print("\nOptional custom palace:")
        print(f"  claude mcp add mempalace -- {base_server_cmd} --palace /path/to/palace")
        print(f"  {base_server_cmd} --palace /path/to/palace")
 def cmd_compress(args):
    """Compress drawers in a wing using AAAK Dialect."""
    import chromadb
@@ -500,6 +524,12 @@ def main():
        help="Rebuild palace vector index from stored data (fixes segfaults after corruption)",
    )
    # mcp
    sub.add_parser(
        "mcp",
        help="Show MCP setup command for connecting MemPalace to your AI client",
    )
    # status
    sub.add_parser("status", help="Show what's been filed")
@@ -531,6 +561,7 @@ def main():
        "mine": cmd_mine,
        "split": cmd_split,
        "search": cmd_search,
        "mcp": cmd_mcp,
        "compress": cmd_compress,
        "wake-up": cmd_wakeup,
        "repair": cmd_repair,
@@ -6,8 +6,58 @@ Priority: env vars > config file (~/.mempalace/config.json) > defaults
 import json
 import os
 import re
 from pathlib import Path
 # ── Input validation ──────────────────────────────────────────────────────────
 # Shared sanitizers for wing/room/entity names. Prevents path traversal,
 # excessively long strings, and special characters that could cause issues
 # in file paths, SQLite, or ChromaDB metadata.
 MAX_NAME_LENGTH = 128
 _SAFE_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_ .'-]{0,126}[a-zA-Z0-9]?$")
 def sanitize_name(value: str, field_name: str = "name") -> str:
    """Validate and sanitize a wing/room/entity name.
    Raises ValueError if the name is invalid.
    """
    if not isinstance(value, str) or not value.strip():
        raise ValueError(f"{field_name} must be a non-empty string")
    value = value.strip()
    if len(value) > MAX_NAME_LENGTH:
        raise ValueError(f"{field_name} exceeds maximum length of {MAX_NAME_LENGTH} characters")
    # Block path traversal
    if ".." in value or "/" in value or "\\" in value:
        raise ValueError(f"{field_name} contains invalid path characters")
    # Block null bytes
    if "\x00" in value:
        raise ValueError(f"{field_name} contains null bytes")
    # Enforce safe character set
    if not _SAFE_NAME_RE.match(value):
        raise ValueError(f"{field_name} contains invalid characters")
    return value
 def sanitize_content(value: str, max_length: int = 100_000) -> str:
    """Validate drawer/diary content length."""
    if not isinstance(value, str) or not value.strip():
        raise ValueError("content must be a non-empty string")
    if len(value) > max_length:
        raise ValueError(f"content exceeds maximum length of {max_length} characters")
    if "\x00" in value:
        raise ValueError("content contains null bytes")
    return value
 DEFAULT_PALACE_PATH = os.path.expanduser("~/.mempalace/palace")
 DEFAULT_COLLECTION_NAME = "mempalace_drawers"
@@ -126,6 +176,11 @@ class MempalaceConfig:
    def init(self):
        """Create config directory and write default config.json if it doesn't exist."""
        self._config_dir.mkdir(parents=True, exist_ok=True)
        # Restrict directory permissions to owner only (Unix)
        try:
            self._config_dir.chmod(0o700)
        except (OSError, NotImplementedError):
            pass  # Windows doesn't support Unix permissions
        if not self._config_file.exists():
            default_config = {
                "palace_path": DEFAULT_PALACE_PATH,
@@ -135,6 +190,11 @@ class MempalaceConfig:
            }
            with open(self._config_file, "w") as f:
                json.dump(default_config, f, indent=2)
            # Restrict config file to owner read/write only
            try:
                self._config_file.chmod(0o600)
            except (OSError, NotImplementedError):
                pass
        return self._config_file
    def save_people_map(self, people_map):
@@ -15,9 +15,8 @@ from pathlib import Path
 from datetime import datetime
 from collections import defaultdict
 import chromadb
 from .normalize import normalize
 from .palace import SKIP_DIRS, get_collection, file_already_mined
 # File types that might contain conversations
@@ -28,22 +27,8 @@ CONVO_EXTENSIONS = {
    ".jsonl",
 }
 SKIP_DIRS = {
    ".git",
    "node_modules",
    "__pycache__",
    ".venv",
    "venv",
    "env",
    "dist",
    "build",
    ".next",
    ".mempalace",
    "tool-results",
    "memory",
 }
 MIN_CHUNK_SIZE = 30
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10 MB — skip files larger than this
 # =============================================================================
@@ -211,23 +196,6 @@ def detect_convo_room(content: str) -> str:
 # =============================================================================
 def get_collection(palace_path: str):
    os.makedirs(palace_path, exist_ok=True)
    client = chromadb.PersistentClient(path=palace_path)
    try:
        return client.get_collection("mempalace_drawers")
    except Exception:
        return client.create_collection("mempalace_drawers")
 def file_already_mined(collection, source_file: str) -> bool:
    try:
        results = collection.get(where={"source_file": source_file}, limit=1)
        return len(results.get("ids", [])) > 0
    except Exception:
        return False
 # =============================================================================
 # SCAN FOR CONVERSATION FILES
 # =============================================================================
@@ -244,6 +212,14 @@ def scan_convos(convo_dir: str) -> list:
                continue
            filepath = Path(root) / filename
            if filepath.suffix.lower() in CONVO_EXTENSIONS:
                # Skip symlinks and oversized files
                if filepath.is_symlink():
                    continue
                try:
                    if filepath.stat().st_size > MAX_FILE_SIZE:
                        continue
                except OSError:
                    continue
                files.append(filepath)
    return files
@@ -356,7 +332,7 @@ def mine_convos(
            chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
            if extract_mode == "general":
                room_counts[chunk_room] += 1
-            drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.md5((source_file + str(chunk['chunk_index'])).encode(), usedforsecurity=False).hexdigest()[:16]}"
+            drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
            try:
                collection.add(
                    documents=[chunk["content"]],
@@ -60,6 +60,7 @@ AI memory system. Store everything, find anything. Local, free, no API key.
    mempalace compress                    Compress palace storage
    mempalace status                      Show palace status
    mempalace repair                      Rebuild vector index
    mempalace mcp                         Show MCP setup command
    mempalace hook run                    Run hook logic (for harness integration)
    mempalace instructions <name>         Output skill instructions
@@ -50,11 +50,14 @@ class KnowledgeGraph:
    def __init__(self, db_path: str = None):
        self.db_path = db_path or DEFAULT_KG_PATH
        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
        self._connection = None
        self._init_db()
    def _init_db(self):
        conn = self._conn()
        conn.executescript("""
            PRAGMA journal_mode=WAL;
            CREATE TABLE IF NOT EXISTS entities (
                id TEXT PRIMARY KEY,
                name TEXT NOT NULL,
@@ -84,12 +87,19 @@ class KnowledgeGraph:
            CREATE INDEX IF NOT EXISTS idx_triples_valid ON triples(valid_from, valid_to);
        """)
        conn.commit()
        conn.close()
    def _conn(self):
-        conn = sqlite3.connect(self.db_path, timeout=10)
+        if self._connection is None:
-        conn.execute("PRAGMA journal_mode=WAL")
+            self._connection = sqlite3.connect(self.db_path, timeout=10, check_same_thread=False)
-        return conn
+            self._connection.execute("PRAGMA journal_mode=WAL")
            self._connection.row_factory = sqlite3.Row
        return self._connection
    def close(self):
        """Close the database connection."""
        if self._connection is not None:
            self._connection.close()
            self._connection = None
    def _entity_id(self, name: str) -> str:
        return name.lower().replace(" ", "_").replace("'", "")
@@ -101,12 +111,11 @@ class KnowledgeGraph:
        eid = self._entity_id(name)
        props = json.dumps(properties or {})
        conn = self._conn()
-        conn.execute(
+        with conn:
-            "INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)",
+            conn.execute(
-            (eid, name, entity_type, props),
+                "INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)",
-        )
+                (eid, name, entity_type, props),
-        conn.commit()
+            )
        conn.close()
        return eid
    def add_triple(
@@ -134,38 +143,38 @@ class KnowledgeGraph:
        # Auto-create entities if they don't exist
        conn = self._conn()
-        conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject))
+        with conn:
-        conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj))
+            conn.execute(
                "INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject)
            )
            conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj))
-        # Check for existing identical triple
+            # Check for existing identical triple
-        existing = conn.execute(
+            existing = conn.execute(
-            "SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
+                "SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
-            (sub_id, pred, obj_id),
+                (sub_id, pred, obj_id),
-        ).fetchone()
+            ).fetchone()
-        if existing:
+            if existing:
-            conn.close()
+                return existing["id"]  # Already exists and still valid
            return existing[0]  # Already exists and still valid
-        triple_id = f"t_{sub_id}_{pred}_{obj_id}_{hashlib.md5(f'{valid_from}{datetime.now().isoformat()}'.encode()).hexdigest()[:8]}"
+            triple_id = f"t_{sub_id}_{pred}_{obj_id}_{hashlib.sha256(f'{valid_from}{datetime.now().isoformat()}'.encode()).hexdigest()[:12]}"
-        conn.execute(
+            conn.execute(
-            """INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file)
+                """INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file)
-               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
-            (
+                (
-                triple_id,
+                    triple_id,
-                sub_id,
+                    sub_id,
-                pred,
+                    pred,
-                obj_id,
+                    obj_id,
-                valid_from,
+                    valid_from,
-                valid_to,
+                    valid_to,
-                confidence,
+                    confidence,
-                source_closet,
+                    source_closet,
-                source_file,
+                    source_file,
-            ),
+                ),
-        )
+            )
        conn.commit()
        conn.close()
        return triple_id
    def invalidate(self, subject: str, predicate: str, obj: str, ended: str = None):
@@ -176,12 +185,11 @@ class KnowledgeGraph:
        ended = ended or date.today().isoformat()
        conn = self._conn()
-        conn.execute(
+        with conn:
-            "UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
+            conn.execute(
-            (ended, sub_id, pred, obj_id),
+                "UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
-        )
+                (ended, sub_id, pred, obj_id),
-        conn.commit()
+            )
        conn.close()
    # ── Query operations ──────────────────────────────────────────────────
@@ -208,13 +216,13 @@ class KnowledgeGraph:
                    {
                        "direction": "outgoing",
                        "subject": name,
-                        "predicate": row[2],
+                        "predicate": row["predicate"],
-                        "object": row[10],  # obj_name
+                        "object": row["obj_name"],
-                        "valid_from": row[4],
+                        "valid_from": row["valid_from"],
-                        "valid_to": row[5],
+                        "valid_to": row["valid_to"],
-                        "confidence": row[6],
+                        "confidence": row["confidence"],
-                        "source_closet": row[7],
+                        "source_closet": row["source_closet"],
-                        "current": row[5] is None,
+                        "current": row["valid_to"] is None,
                    }
                )
@@ -228,18 +236,17 @@ class KnowledgeGraph:
                results.append(
                    {
                        "direction": "incoming",
-                        "subject": row[10],  # sub_name
+                        "subject": row["sub_name"],
-                        "predicate": row[2],
+                        "predicate": row["predicate"],
                        "object": name,
-                        "valid_from": row[4],
+                        "valid_from": row["valid_from"],
-                        "valid_to": row[5],
+                        "valid_to": row["valid_to"],
-                        "confidence": row[6],
+                        "confidence": row["confidence"],
-                        "source_closet": row[7],
+                        "source_closet": row["source_closet"],
-                        "current": row[5] is None,
+                        "current": row["valid_to"] is None,
                    }
                )
        conn.close()
        return results
    def query_relationship(self, predicate: str, as_of: str = None):
@@ -262,15 +269,14 @@ class KnowledgeGraph:
        for row in conn.execute(query, params).fetchall():
            results.append(
                {
-                    "subject": row[10],
+                    "subject": row["sub_name"],
                    "predicate": pred,
-                    "object": row[11],
+                    "object": row["obj_name"],
-                    "valid_from": row[4],
+                    "valid_from": row["valid_from"],
-                    "valid_to": row[5],
+                    "valid_to": row["valid_to"],
-                    "current": row[5] is None,
+                    "current": row["valid_to"] is None,
                }
            )
        conn.close()
        return results
    def timeline(self, entity_name: str = None):
@@ -300,15 +306,14 @@ class KnowledgeGraph:
                LIMIT 100
            """).fetchall()
        conn.close()
        return [
            {
-                "subject": r[10],
+                "subject": r["sub_name"],
-                "predicate": r[2],
+                "predicate": r["predicate"],
-                "object": r[11],
+                "object": r["obj_name"],
-                "valid_from": r[4],
+                "valid_from": r["valid_from"],
-                "valid_to": r[5],
+                "valid_to": r["valid_to"],
-                "current": r[5] is None,
+                "current": r["valid_to"] is None,
            }
            for r in rows
        ]
@@ -317,17 +322,18 @@ class KnowledgeGraph:
    def stats(self):
        conn = self._conn()
-        entities = conn.execute("SELECT COUNT(*) FROM entities").fetchone()[0]
+        entities = conn.execute("SELECT COUNT(*) as cnt FROM entities").fetchone()["cnt"]
-        triples = conn.execute("SELECT COUNT(*) FROM triples").fetchone()[0]
+        triples = conn.execute("SELECT COUNT(*) as cnt FROM triples").fetchone()["cnt"]
-        current = conn.execute("SELECT COUNT(*) FROM triples WHERE valid_to IS NULL").fetchone()[0]
+        current = conn.execute(
            "SELECT COUNT(*) as cnt FROM triples WHERE valid_to IS NULL"
        ).fetchone()["cnt"]
        expired = triples - current
        predicates = [
-            r[0]
+            r["predicate"]
            for r in conn.execute(
                "SELECT DISTINCT predicate FROM triples ORDER BY predicate"
            ).fetchall()
        ]
        conn.close()
        return {
            "entities": entities,
            "triples": triples,
@@ -24,8 +24,9 @@ import json
 import logging
 import hashlib
 from datetime import datetime
 from pathlib import Path
-from .config import MempalaceConfig
+from .config import MempalaceConfig, sanitize_name, sanitize_content
 from .version import __version__
 from .searcher import search_memories
 from .palace_graph import traverse, find_tunnels, graph_stats
@@ -66,16 +67,60 @@ _client_cache = None
 _collection_cache = None
 # ==================== WRITE-AHEAD LOG ====================
 # Every write operation is logged to a JSONL file before execution.
 # This provides an audit trail for detecting memory poisoning and
 # enables review/rollback of writes from external or untrusted sources.
 _WAL_DIR = Path(os.path.expanduser("~/.mempalace/wal"))
 _WAL_DIR.mkdir(parents=True, exist_ok=True)
 try:
    _WAL_DIR.chmod(0o700)
 except (OSError, NotImplementedError):
    pass
 _WAL_FILE = _WAL_DIR / "write_log.jsonl"
 def _wal_log(operation: str, params: dict, result: dict = None):
    """Append a write operation to the write-ahead log."""
    entry = {
        "timestamp": datetime.now().isoformat(),
        "operation": operation,
        "params": params,
        "result": result,
    }
    try:
        with open(_WAL_FILE, "a", encoding="utf-8") as f:
            f.write(json.dumps(entry, default=str) + "\n")
        try:
            _WAL_FILE.chmod(0o600)
        except (OSError, NotImplementedError):
            pass
    except Exception as e:
        logger.error(f"WAL write failed: {e}")
 _client_cache = None
 _collection_cache = None
 def _get_client():
    """Return a singleton ChromaDB PersistentClient."""
    global _client_cache
    if _client_cache is None:
        _client_cache = chromadb.PersistentClient(path=_config.palace_path)
    return _client_cache
 def _get_collection(create=False):
    """Return the ChromaDB collection, caching the client between calls."""
-    global _client_cache, _collection_cache
+    global _collection_cache
    try:
-        if _client_cache is None:
+        client = _get_client()
            _client_cache = chromadb.PersistentClient(path=_config.palace_path)
        if create:
-            _collection_cache = _client_cache.get_or_create_collection(_config.collection_name)
+            _collection_cache = client.get_or_create_collection(_config.collection_name)
        elif _collection_cache is None:
-            _collection_cache = _client_cache.get_collection(_config.collection_name)
+            _collection_cache = client.get_collection(_config.collection_name)
        return _collection_cache
    except Exception:
        return None
@@ -330,11 +375,30 @@ def tool_add_drawer(
    wing: str, room: str, content: str, source_file: str = None, added_by: str = "mcp"
 ):
    """File verbatim content into a wing/room. Checks for duplicates first."""
    try:
        wing = sanitize_name(wing, "wing")
        room = sanitize_name(room, "room")
        content = sanitize_content(content)
    except ValueError as e:
        return {"success": False, "error": str(e)}
    col = _get_collection(create=True)
    if not col:
        return _no_palace()
-    drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}"
+    drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((wing + room + content[:100]).encode()).hexdigest()[:24]}"
    _wal_log(
        "add_drawer",
        {
            "drawer_id": drawer_id,
            "wing": wing,
            "room": room,
            "added_by": added_by,
            "content_length": len(content),
            "content_preview": content[:200],
        },
    )
    # Idempotency: if the deterministic ID already exists, return success as a no-op.
    try:
@@ -373,6 +437,19 @@ def tool_delete_drawer(drawer_id: str):
    existing = col.get(ids=[drawer_id])
    if not existing["ids"]:
        return {"success": False, "error": f"Drawer not found: {drawer_id}"}
    # Log the deletion with the content being removed for audit trail
    deleted_content = existing.get("documents", [""])[0] if existing.get("documents") else ""
    deleted_meta = existing.get("metadatas", [{}])[0] if existing.get("metadatas") else {}
    _wal_log(
        "delete_drawer",
        {
            "drawer_id": drawer_id,
            "deleted_meta": deleted_meta,
            "content_preview": deleted_content[:200],
        },
    )
    try:
        col.delete(ids=[drawer_id])
        logger.info(f"Deleted drawer: {drawer_id}")
@@ -394,6 +471,23 @@ def tool_kg_add(
    subject: str, predicate: str, object: str, valid_from: str = None, source_closet: str = None
 ):
    """Add a relationship to the knowledge graph."""
    try:
        subject = sanitize_name(subject, "subject")
        predicate = sanitize_name(predicate, "predicate")
        object = sanitize_name(object, "object")
    except ValueError as e:
        return {"success": False, "error": str(e)}
    _wal_log(
        "kg_add",
        {
            "subject": subject,
            "predicate": predicate,
            "object": object,
            "valid_from": valid_from,
            "source_closet": source_closet,
        },
    )
    triple_id = _kg.add_triple(
        subject, predicate, object, valid_from=valid_from, source_closet=source_closet
    )
@@ -402,6 +496,10 @@ def tool_kg_add(
 def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None):
    """Mark a fact as no longer true (set end date)."""
    _wal_log(
        "kg_invalidate",
        {"subject": subject, "predicate": predicate, "object": object, "ended": ended},
    )
    _kg.invalidate(subject, predicate, object, ended=ended)
    return {
        "success": True,
@@ -432,6 +530,12 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
    This is the agent's personal journal — observations, thoughts,
    what it worked on, what it noticed, what it thinks matters.
    """
    try:
        agent_name = sanitize_name(agent_name, "agent_name")
        entry = sanitize_content(entry)
    except ValueError as e:
        return {"success": False, "error": str(e)}
    wing = f"wing_{agent_name.lower().replace(' ', '_')}"
    room = "diary"
    col = _get_collection(create=True)
@@ -439,9 +543,23 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
        return _no_palace()
    now = datetime.now()
-    entry_id = f"diary_{wing}_{now.strftime('%Y%m%d_%H%M%S')}_{hashlib.md5(entry[:50].encode()).hexdigest()[:8]}"
+    entry_id = f"diary_{wing}_{now.strftime('%Y%m%d_%H%M%S')}_{hashlib.sha256(entry[:50].encode()).hexdigest()[:12]}"
    _wal_log(
        "diary_write",
        {
            "agent_name": agent_name,
            "topic": topic,
            "entry_id": entry_id,
            "entry_preview": entry[:200],
        },
    )
    try:
        # TODO: Future versions should expand AAAK before embedding to improve
        # semantic search quality. For now, store raw AAAK in metadata so it's
        # preserved, and keep the document as-is for embedding (even though
        # compressed AAAK degrades embedding quality).
        col.add(
            ids=[entry_id],
            documents=[entry],
@@ -767,17 +885,31 @@ TOOLS = {
 }
 SUPPORTED_PROTOCOL_VERSIONS = [
    "2025-11-25",
    "2025-06-18",
    "2025-03-26",
    "2024-11-05",
 ]
 def handle_request(request):
    method = request.get("method", "")
    params = request.get("params", {})
    req_id = request.get("id")
    if method == "initialize":
        client_version = params.get("protocolVersion", SUPPORTED_PROTOCOL_VERSIONS[-1])
        negotiated = (
            client_version
            if client_version in SUPPORTED_PROTOCOL_VERSIONS
            else SUPPORTED_PROTOCOL_VERSIONS[0]
        )
        return {
            "jsonrpc": "2.0",
            "id": req_id,
            "result": {
-                "protocolVersion": "2024-11-05",
+                "protocolVersion": negotiated,
                "capabilities": {"tools": {}},
                "serverInfo": {"name": "mempalace", "version": __version__},
            },
@@ -797,7 +929,7 @@ def handle_request(request):
        }
    elif method == "tools/call":
        tool_name = params.get("name")
-        tool_args = params.get("arguments", {})
+        tool_args = params.get("arguments") or {}
        if tool_name not in TOOLS:
            return {
                "jsonrpc": "2.0",
@@ -17,6 +17,8 @@ from collections import defaultdict
 import chromadb
 from .palace import SKIP_DIRS, get_collection, file_already_mined
 READABLE_EXTENSIONS = {
    ".txt",
    ".md",
@@ -40,32 +42,6 @@ READABLE_EXTENSIONS = {
    ".toml",
 }
 SKIP_DIRS = {
    ".git",
    "node_modules",
    "__pycache__",
    ".venv",
    "venv",
    "env",
    "dist",
    "build",
    ".next",
    "coverage",
    ".mempalace",
    ".ruff_cache",
    ".mypy_cache",
    ".pytest_cache",
    ".cache",
    ".tox",
    ".nox",
    ".idea",
    ".vscode",
    ".ipynb_checkpoints",
    ".eggs",
    "htmlcov",
    "target",
 }
 SKIP_FILENAMES = {
    "mempalace.yaml",
    "mempalace.yml",
@@ -78,6 +54,7 @@ SKIP_FILENAMES = {
 CHUNK_SIZE = 800  # chars per drawer
 CHUNK_OVERLAP = 100  # overlap between chunks
 MIN_CHUNK_SIZE = 50  # skip tiny chunks
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10 MB — skip files larger than this
 # =============================================================================
@@ -393,41 +370,11 @@ def chunk_text(content: str, source_file: str) -> list:
 # =============================================================================
 def get_collection(palace_path: str):
    os.makedirs(palace_path, exist_ok=True)
    client = chromadb.PersistentClient(path=palace_path)
    try:
        return client.get_collection("mempalace_drawers")
    except Exception:
        return client.create_collection("mempalace_drawers")
 def file_already_mined(collection, source_file: str) -> bool:
    """Fast check: has this file been filed before and is unchanged?
    Compares the stored mtime in drawer metadata against the file's current
    mtime.  Returns False (needs re-mining) when the file has been modified
    since it was last mined, or when no mtime was stored.
    """
    try:
        results = collection.get(where={"source_file": source_file}, limit=1)
        if not results.get("ids"):
            return False
        stored_meta = results["metadatas"][0] if results.get("metadatas") else {}
        stored_mtime = stored_meta.get("source_mtime")
        if stored_mtime is None:
            return False
        current_mtime = os.path.getmtime(source_file)
        return float(stored_mtime) == current_mtime
    except Exception:
        return False
 def add_drawer(
    collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
 ):
    """Add one drawer to the palace."""
-    drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}"
+    drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk_index)).encode()).hexdigest()[:24]}"
    try:
        metadata = {
            "wing": wing,
@@ -470,7 +417,7 @@ def process_file(
    # Skip if already filed
    source_file = str(filepath)
-    if not dry_run and file_already_mined(collection, source_file):
+    if not dry_run and file_already_mined(collection, source_file, check_mtime=True):
        return 0, None
    try:
@@ -562,6 +509,15 @@ def scan_project(
            if respect_gitignore and active_matchers and not force_include:
                if is_gitignored(filepath, active_matchers, is_dir=False):
                    continue
            # Skip symlinks — prevents following links to /dev/urandom, etc.
            if filepath.is_symlink():
                continue
            # Skip files exceeding size limit
            try:
                if filepath.stat().st_size > MAX_FILE_SIZE:
                    continue
            except OSError:
                continue
            files.append(filepath)
    return files
@@ -25,6 +25,12 @@ def normalize(filepath: str) -> str:
    Load a file and normalize to transcript format if it's a chat export.
    Plain text files pass through unchanged.
    """
    try:
        file_size = os.path.getsize(filepath)
    except OSError as e:
        raise IOError(f"Could not read {filepath}: {e}")
    if file_size > 500 * 1024 * 1024:  # 500 MB safety limit
        raise IOError(f"File too large ({file_size // (1024*1024)} MB): {filepath}")
    try:
        with open(filepath, "r", encoding="utf-8", errors="replace") as f:
            content = f.read()
@@ -0,0 +1,71 @@
 """
 palace.py — Shared palace operations.
 Consolidates ChromaDB access patterns used by both miners and the MCP server.
 """
 import os
 import chromadb
 SKIP_DIRS = {
    ".git",
    "node_modules",
    "__pycache__",
    ".venv",
    "venv",
    "env",
    "dist",
    "build",
    ".next",
    "coverage",
    ".mempalace",
    ".ruff_cache",
    ".mypy_cache",
    ".pytest_cache",
    ".cache",
    ".tox",
    ".nox",
    ".idea",
    ".vscode",
    ".ipynb_checkpoints",
    ".eggs",
    "htmlcov",
    "target",
 }
 def get_collection(palace_path: str, collection_name: str = "mempalace_drawers"):
    """Get or create the palace ChromaDB collection."""
    os.makedirs(palace_path, exist_ok=True)
    try:
        os.chmod(palace_path, 0o700)
    except (OSError, NotImplementedError):
        pass
    client = chromadb.PersistentClient(path=palace_path)
    try:
        return client.get_collection(collection_name)
    except Exception:
        return client.create_collection(collection_name)
 def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool:
    """Check if a file has already been filed in the palace.
    When check_mtime=True (used by project miner), returns False if the file
    has been modified since it was last mined, so it gets re-mined.
    When check_mtime=False (used by convo miner), just checks existence.
    """
    try:
        results = collection.get(where={"source_file": source_file}, limit=1)
        if not results.get("ids"):
            return False
        if check_mtime:
            stored_meta = results.get("metadatas", [{}])[0]
            stored_mtime = stored_meta.get("source_mtime")
            if stored_mtime is None:
                return False
            current_mtime = os.path.getmtime(source_file)
            return float(stored_mtime) == current_mtime
        return True
    except Exception:
        return False
@@ -182,6 +182,10 @@ def split_file(filepath, output_dir, dry_run=False):
    Returns list of output paths written (or would be written if dry_run).
    """
    path = Path(filepath)
    max_size = 500 * 1024 * 1024  # 500 MB safety limit
    if path.stat().st_size > max_size:
        print(f"  SKIP: {path.name} exceeds {max_size // (1024*1024)} MB limit")
        return []
    lines = path.read_text(errors="replace").splitlines(keepends=True)
    boundaries = find_session_boundaries(lines)
@@ -266,7 +270,11 @@ def main():
        files = sorted(src_dir.glob("*.txt"))
    mega_files = []
    max_scan_size = 500 * 1024 * 1024  # 500 MB
    for f in files:
        if f.stat().st_size > max_scan_size:
            print(f"  SKIP: {f.name} exceeds {max_scan_size // (1024*1024)} MB limit")
            continue
        lines = f.read_text(errors="replace").splitlines(keepends=True)
        boundaries = find_session_boundaries(lines)
        if len(boundaries) >= args.min_sessions:
@@ -1,3 +1,3 @@
 """Single source of truth for the MemPalace package version."""
-__version__ = "3.0.14"
+__version__ = "3.1.0"
@@ -1,6 +1,6 @@
 [project]
 name = "mempalace"
-version = "3.0.14"
+version = "3.1.0"
 description = "Give your AI a memory — mine projects and conversations into a searchable palace. No API key required."
 readme = "README.md"
 requires-python = ">=3.9"
@@ -26,7 +26,7 @@ classifiers = [
 ]
 dependencies = [
    "chromadb>=0.5.0,<0.7",
-    "pyyaml>=6.0",
+    "pyyaml>=6.0,<7",
 ]
 [project.urls]
@@ -2,6 +2,7 @@
 import argparse
 import sys
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
@@ -326,6 +327,35 @@ def test_main_split_dispatches():
        mock_cmd.assert_called_once()
 def test_mcp_command_prints_setup_guidance(monkeypatch, capsys):
    monkeypatch.setattr(sys, "argv", ["mempalace", "mcp"])
    main()
    captured = capsys.readouterr()
    assert "MemPalace MCP quick setup:" in captured.out
    assert "claude mcp add mempalace -- python -m mempalace.mcp_server" in captured.out
    assert "\nOptional custom palace:\n" in captured.out
    assert "python -m mempalace.mcp_server --palace /path/to/palace" in captured.out
    assert "[--palace /path/to/palace]" not in captured.out
    assert captured.err == ""
 def test_mcp_command_uses_custom_palace_path_when_provided(monkeypatch, capsys):
    monkeypatch.setattr(sys, "argv", ["mempalace", "--palace", "~/tmp/my palace", "mcp"])
    main()
    captured = capsys.readouterr()
    expanded = str(Path("~/tmp/my palace").expanduser())
    assert "python -m mempalace.mcp_server --palace" in captured.out
    assert expanded in captured.out
    assert "Optional custom palace:" not in captured.out
    assert "[--palace /path/to/palace]" not in captured.out
    assert captured.err == ""
 def test_main_hook_no_subcommand_prints_help(capsys):
    with patch("sys.argv", ["mempalace", "hook"]):
        main()
@@ -607,3 +637,16 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys):
    out = capsys.readouterr().out
    assert "Stored" in out
    mock_comp_col.upsert.assert_called_once()
 def test_cmd_repair_trailing_slash_does_not_recurse():
    """Repair with trailing slash should put backup outside palace dir (#395)."""
    import os
    args = argparse.Namespace(palace="/tmp/fake_palace/")
    with patch("mempalace.cli.os.path.isdir", return_value=False):
        cmd_repair(args)
    # Verify the rstrip logic: palace_path should not end with separator
    palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
    backup_path = palace_path + ".backup"
    assert not backup_path.startswith(palace_path + os.sep)
@@ -42,6 +42,50 @@ class TestHandleRequest:
        assert resp["result"]["serverInfo"]["name"] == "mempalace"
        assert resp["id"] == 1
    def test_initialize_negotiates_client_version(self):
        from mempalace.mcp_server import handle_request
        resp = handle_request(
            {
                "method": "initialize",
                "id": 1,
                "params": {"protocolVersion": "2025-11-25"},
            }
        )
        assert resp["result"]["protocolVersion"] == "2025-11-25"
    def test_initialize_negotiates_older_supported_version(self):
        from mempalace.mcp_server import handle_request
        resp = handle_request(
            {
                "method": "initialize",
                "id": 1,
                "params": {"protocolVersion": "2025-03-26"},
            }
        )
        assert resp["result"]["protocolVersion"] == "2025-03-26"
    def test_initialize_unknown_version_falls_back_to_latest(self):
        from mempalace.mcp_server import handle_request
        resp = handle_request(
            {
                "method": "initialize",
                "id": 1,
                "params": {"protocolVersion": "9999-12-31"},
            }
        )
        from mempalace.mcp_server import SUPPORTED_PROTOCOL_VERSIONS
        assert resp["result"]["protocolVersion"] == SUPPORTED_PROTOCOL_VERSIONS[0]
    def test_initialize_missing_version_uses_oldest(self):
        from mempalace.mcp_server import handle_request, SUPPORTED_PROTOCOL_VERSIONS
        resp = handle_request({"method": "initialize", "id": 1, "params": {}})
        assert resp["result"]["protocolVersion"] == SUPPORTED_PROTOCOL_VERSIONS[-1]
    def test_notifications_initialized_returns_none(self):
        from mempalace.mcp_server import handle_request
@@ -59,6 +103,23 @@ class TestHandleRequest:
        assert "mempalace_add_drawer" in names
        assert "mempalace_kg_add" in names
    def test_null_arguments_does_not_hang(self, monkeypatch, config, palace_path, seeded_kg):
        """Sending arguments: null should return a result, not hang (#394)."""
        _patch_mcp_server(monkeypatch, config, seeded_kg)
        from mempalace.mcp_server import handle_request
        _client, _col = _get_collection(palace_path, create=True)
        del _client
        resp = handle_request(
            {
                "method": "tools/call",
                "id": 10,
                "params": {"name": "mempalace_status", "arguments": None},
            }
        )
        assert "error" not in resp
        assert resp["result"] is not None
    def test_unknown_tool(self):
        from mempalace.mcp_server import handle_request
@@ -7,6 +7,7 @@ import chromadb
 import yaml
 from mempalace.miner import mine, scan_project
 from mempalace.palace import file_already_mined
 def write_file(path: Path, content: str):
@@ -206,3 +207,56 @@ def test_scan_project_skip_dirs_still_apply_without_override():
        assert scanned_files(project_root, respect_gitignore=False) == ["main.py"]
    finally:
        shutil.rmtree(tmpdir)
 def test_file_already_mined_check_mtime():
    tmpdir = tempfile.mkdtemp()
    try:
        palace_path = os.path.join(tmpdir, "palace")
        os.makedirs(palace_path)
        client = chromadb.PersistentClient(path=palace_path)
        col = client.get_or_create_collection("mempalace_drawers")
        test_file = os.path.join(tmpdir, "test.txt")
        with open(test_file, "w") as f:
            f.write("hello world")
        mtime = os.path.getmtime(test_file)
        # Not mined yet
        assert file_already_mined(col, test_file) is False
        assert file_already_mined(col, test_file, check_mtime=True) is False
        # Add it with mtime
        col.add(
            ids=["d1"],
            documents=["hello world"],
            metadatas=[{"source_file": test_file, "source_mtime": str(mtime)}],
        )
        # Already mined (no mtime check)
        assert file_already_mined(col, test_file) is True
        # Already mined (mtime matches)
        assert file_already_mined(col, test_file, check_mtime=True) is True
        # Modify file and force a different mtime (Windows has low mtime resolution)
        with open(test_file, "w") as f:
            f.write("modified content")
        os.utime(test_file, (mtime + 10, mtime + 10))
        # Still mined without mtime check
        assert file_already_mined(col, test_file) is True
        # Needs re-mining with mtime check
        assert file_already_mined(col, test_file, check_mtime=True) is False
        # Record with no mtime stored should return False for check_mtime
        col.add(
            ids=["d2"],
            documents=["other"],
            metadatas=[{"source_file": "/fake/no_mtime.txt"}],
        )
        assert file_already_mined(col, "/fake/no_mtime.txt", check_mtime=True) is False
    finally:
        # Release ChromaDB file handles before cleanup (required on Windows)
        del col, client
        shutil.rmtree(tmpdir, ignore_errors=True)
@@ -499,3 +499,13 @@ def test_messages_to_transcript_assistant_first():
    result = _messages_to_transcript(msgs, spellcheck=False)
    assert "preamble" in result
    assert "> Q" in result
 def test_normalize_rejects_large_file():
    """Files over 500 MB should raise IOError before reading."""
    with patch("mempalace.normalize.os.path.getsize", return_value=600 * 1024 * 1024):
        try:
            normalize("/fake/huge_file.txt")
            assert False, "Should have raised IOError"
        except IOError as e:
            assert "too large" in str(e).lower()
`@@ -1,3 +1,3 @@`
	`"""Single source of truth for the MemPalace package version."""`	`"""Single source of truth for the MemPalace package version."""`

	`__version__ = "3.0.14"`	`__version__ = "3.1.0"`