cleanup and remote only

2026-05-09 10:52:25 -05:00
parent 2fc47a52fc
commit 40e5e5e3cc
136 changed files with 1502 additions and 349529 deletions
@@ -1,20 +0,0 @@
 {
  "name": "mempalace",
  "interface": {
    "displayName": "MemPalace"
  },
  "plugins": [
    {
      "name": "mempalace",
      "source": {
        "source": "local",
        "path": "./.codex-plugin"
      },
      "policy": {
        "installation": "AVAILABLE",
        "authentication": "NONE"
      },
      "category": "Coding"
    }
  ]
 }
@@ -1,5 +0,0 @@
 {
  "mempalace": {
    "command": "mempalace-mcp"
  }
 }
@@ -1,57 +0,0 @@
 # MemPalace Claude Code Plugin
 A Claude Code plugin that gives your AI a persistent memory system. Mine projects and conversations into a searchable palace backed by ChromaDB, with 19 MCP tools, auto-save hooks, and 5 guided skills.
 ## Prerequisites
 - Python 3.9+
 ## Installation
 ### Claude Code Marketplace
 ```bash
 claude plugin marketplace add MemPalace/mempalace
 claude plugin install --scope user mempalace
 ```
 ### Local Clone
 ```bash
 claude plugin add /path/to/mempalace
 ```
 ## Post-Install Setup
 After installing the plugin, run the init command to complete setup (installs the `mempalace` package via `uv tool` or `pip`, configures MCP, etc.):
 ```
 /mempalace:init
 ```
 ## Available Slash Commands
 | Command | Description |
 |---------|-------------|
 | `/mempalace:help` | Show available tools, skills, and architecture |
 | `/mempalace:init` | Set up MemPalace -- install, configure MCP, onboard |
 | `/mempalace:search` | Search your memories across the palace |
 | `/mempalace:mine` | Mine projects and conversations into the palace |
 | `/mempalace:status` | Show palace overview -- wings, rooms, drawer counts |
 ## Hooks
 MemPalace registers two hooks that run automatically:
 - **Stop** -- Saves conversation context every 15 messages.
 - **PreCompact** -- Preserves important memories before context compaction.
 Set the `MEMPAL_DIR` environment variable to a directory path to automatically run `mempalace mine` on that directory during each save trigger.
 ## MCP Server
 The plugin automatically configures a local MCP server with 19 tools for storing, searching, and managing memories. No manual MCP setup is required -- `/mempalace:init` handles everything.
 ## Full Documentation
 See the main [README](../README.md) for complete documentation, architecture details, and advanced usage.
@@ -1,6 +0,0 @@
 ---
 description: Show comprehensive MemPalace help — available skills, MCP tools, CLI commands, hooks, and architecture.
 allowed-tools: Bash, Read
 ---
 Invoke the generic mempalace skill (using the Skill tool) with the `help` command, then follow its instructions.
@@ -1,6 +0,0 @@
 ---
 description: Set up MemPalace — install the package, initialize a palace, configure MCP server, and verify everything works.
 allowed-tools: Bash, Read, Write, Edit, Glob, Grep
 ---
 Invoke the generic mempalace skill (using the Skill tool) with the `init` command, then follow its instructions.
@@ -1,7 +0,0 @@
 ---
 description: Mine projects and conversations into the MemPalace. Supports project files, conversation exports, and auto-classification.
 argument-hint: Path to project or conversation export to mine.
 allowed-tools: Bash, Read, Write, Edit, Glob, Grep
 ---
 Invoke the generic mempalace skill (using the Skill tool) with the `mine` command, then follow its instructions.
@@ -1,7 +0,0 @@
 ---
 description: Search your memories across the MemPalace using semantic search with wing/room filtering.
 argument-hint: Search query, optionally with wing/room filters.
 allowed-tools: Bash, Read
 ---
 Invoke the generic mempalace skill (using the Skill tool) with the `search` command, then follow its instructions.
@@ -1,6 +0,0 @@
 ---
 description: Show the current state of your memory palace — wings, rooms, drawer counts, and suggestions.
 allowed-tools: Bash, Read
 ---
 Invoke the generic mempalace skill (using the Skill tool) with the `status` command, then follow its instructions.
@@ -1,25 +0,0 @@
 {
  "description": "MemPalace auto-save and pre-compact hooks",
  "hooks": {
    "Stop": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/mempal-stop-hook.sh\""
          }
        ]
      }
    ],
    "PreCompact": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/mempal-precompact-hook.sh\""
          }
        ]
      }
    ]
  }
 }
@@ -1,24 +0,0 @@
 #!/bin/bash
 # MemPalace PreCompact Hook — thin wrapper calling Python CLI
 # All logic lives in mempalace.hooks_cli for cross-harness extensibility
 run_mempalace_hook() {
  if command -v mempalace >/dev/null 2>&1; then
    mempalace hook run "$@"
    return $?
  fi
  if command -v python3 >/dev/null 2>&1 && python3 -c "import mempalace" >/dev/null 2>&1; then
    python3 -m mempalace hook run "$@"
    return $?
  fi
  if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
    python -m mempalace hook run "$@"
    return $?
  fi
  echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
  return 1
 }
 run_mempalace_hook --hook precompact --harness claude-code
@@ -1,24 +0,0 @@
 #!/bin/bash
 # MemPalace Stop Hook — thin wrapper calling Python CLI
 # All logic lives in mempalace.hooks_cli for cross-harness extensibility
 run_mempalace_hook() {
  if command -v mempalace >/dev/null 2>&1; then
    mempalace hook run "$@"
    return $?
  fi
  if command -v python3 >/dev/null 2>&1 && python3 -c "import mempalace" >/dev/null 2>&1; then
    python3 -m mempalace hook run "$@"
    return $?
  fi
  if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
    python -m mempalace hook run "$@"
    return $?
  fi
  echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
  return 1
 }
 run_mempalace_hook --hook stop --harness claude-code
@@ -1,18 +0,0 @@
 {
  "name": "mempalace",
  "owner": {
    "name": "milla-jovovich",
    "url": "https://github.com/MemPalace"
  },
  "plugins": [
    {
      "name": "mempalace",
      "source": "./.claude-plugin",
      "description": "AI memory system — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, guided setup.",
      "version": "3.3.4",
      "author": {
        "name": "milla-jovovich"
      }
    }
  ]
 }
@@ -1,25 +0,0 @@
 {
  "name": "mempalace",
  "version": "3.3.4",
  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.",
  "author": {
    "name": "milla-jovovich"
  },
  "license": "MIT",
  "commands": [],
  "mcpServers": {
    "mempalace": {
      "command": "mempalace-mcp"
    }
  },
  "keywords": [
    "memory",
    "ai",
    "rag",
    "mcp",
    "chromadb",
    "palace",
    "search"
  ],
  "repository": "https://github.com/MemPalace/mempalace"
 }
@@ -1,35 +0,0 @@
 ---
 name: mempalace
 description: MemPalace — mine projects and conversations into a searchable memory palace. Use when asked about mempalace, memory palace, mining memories, searching memories, or palace setup.
 allowed-tools: Bash, Read, Write, Edit, Glob, Grep
 ---
 # MemPalace
 A searchable memory palace for AI — mine projects and conversations, then search them semantically.
 ## Prerequisites
 Ensure `mempalace` is installed:
 ```bash
 mempalace --version
 ```
 If not installed (uv recommended):
 ```bash
 uv tool install mempalace   # or: pip install mempalace
 ```
 ## Usage
 MemPalace provides dynamic instructions via the CLI. To get instructions for any operation:
 ```bash
 mempalace instructions <command>
 ```
 Where `<command>` is one of: `help`, `init`, `mine`, `search`, `status`.
 Run the appropriate instructions command, then follow the returned instructions step by step.
@@ -1,80 +0,0 @@
 # MemPalace - Codex CLI Plugin
 Give your AI a persistent memory -- mine projects and conversations into a searchable palace backed by ChromaDB, with 19 MCP tools, auto-save hooks, and guided skills.
 ## Prerequisites
 - Python 3.9+
 - Codex CLI installed and configured
 - `uv tool install mempalace` (recommended) or `pip install mempalace`
 ## Installation
 ### Local Install
 1. Copy or symlink the `.codex-plugin` directory into your project root:
 ```bash
 cp -r .codex-plugin /path/to/your/project/.codex-plugin
 ```
 2. Verify the plugin is detected:
 ```bash
 codex --plugins
 ```
 3. Initialize your palace:
 ```bash
 codex /init
 ```
 ### Git Install
 1. Clone the MemPalace repository:
 ```bash
 git clone https://github.com/MemPalace/mempalace.git
 cd mempalace
 ```
 2. Install the Python package so the `mempalace-mcp` script lands on
   your PATH (the bundled `plugin.json` invokes it by bare name):
 ```bash
 uv tool install --editable .   # or: pip install -e .
 ```
   Plain `uv sync` is **not** enough here — it installs the scripts into
   `.venv/bin/`, which Codex will not find unless you activate the venv
   before launching Codex.
 3. The `.codex-plugin` directory is already in the repo root. Codex CLI will detect it automatically when you run Codex from inside the repository.
 4. Initialize your palace:
 ```bash
 codex /init
 ```
 ## Available Skills
 | Skill | Description |
 |-------|-------------|
 | `/help` | Show available commands and usage tips |
 | `/init` | Initialize a new memory palace |
 | `/search` | Semantic search across all mined memories |
 | `/mine` | Mine a project or conversation into your palace |
 | `/status` | Show palace status, room counts, and health |
 ## Hooks
 The plugin includes auto-save hooks that run on session stop (every 15 messages) and before context compaction, automatically preserving conversation context into your palace.
 Set the `MEMPAL_DIR` environment variable to a directory path to automatically run `mempalace mine` on that directory during each save trigger.
 ## Support
 - Repository: https://github.com/MemPalace/mempalace
 - Issues: https://github.com/MemPalace/mempalace/issues
@@ -1,37 +0,0 @@
 {
  "hooks": {
    "SessionStart": [
      {
        "matcher": "*",
        "hooks": [
          {
            "type": "command",
            "command": "\"${CODEX_PLUGIN_ROOT}/hooks/mempal-hook.sh\" session-start"
          }
        ]
      }
    ],
    "Stop": [
      {
        "matcher": "*",
        "hooks": [
          {
            "type": "command",
            "command": "\"${CODEX_PLUGIN_ROOT}/hooks/mempal-hook.sh\" stop"
          }
        ]
      }
    ],
    "PreCompact": [
      {
        "matcher": "*",
        "hooks": [
          {
            "type": "command",
            "command": "\"${CODEX_PLUGIN_ROOT}/hooks/mempal-hook.sh\" precompact"
          }
        ]
      }
    ]
  }
 }
@@ -1,9 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 HOOK_NAME="${1:?Usage: mempal-hook.sh <hook-name>}"
 INPUT_FILE=$(mktemp) || { echo "Failed to create temp file" >&2; exit 1; }
 cat > "$INPUT_FILE"
 cat "$INPUT_FILE" | mempalace hook run --hook "$HOOK_NAME" --harness codex
 EXIT_CODE=$?
 rm -f "$INPUT_FILE" 2>/dev/null
 exit $EXIT_CODE
@@ -1,48 +0,0 @@
 {
  "name": "mempalace",
  "version": "3.3.4",
  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.",
  "author": {
    "name": "milla-jovovich"
  },
  "homepage": "https://github.com/MemPalace/mempalace",
  "repository": "https://github.com/MemPalace/mempalace",
  "license": "MIT",
  "keywords": [
    "memory",
    "ai",
    "rag",
    "mcp",
    "chromadb",
    "palace",
    "search"
  ],
  "skills": "./skills/",
  "hooks": "./hooks.json",
  "mcpServers": {
    "mempalace": {
      "command": "mempalace-mcp"
    }
  },
  "interface": {
    "displayName": "MemPalace",
    "shortDescription": "AI memory system for Codex",
    "longDescription": "Give your AI a persistent memory — mine projects and conversations into a searchable palace backed by ChromaDB, with 19 MCP tools, auto-save hooks, and guided skills.",
    "developerName": "milla-jovovich",
    "category": "Coding",
    "capabilities": [
      "Interactive",
      "Read",
      "Write"
    ],
    "websiteURL": "https://github.com/MemPalace/mempalace",
    "privacyPolicyURL": "https://github.com/MemPalace/mempalace",
    "termsOfServiceURL": "https://github.com/MemPalace/mempalace",
    "defaultPrompt": [
      "Search my memories for recent decisions",
      "Mine this project into my memory palace",
      "Show my palace status and room counts"
    ],
    "brandColor": "#7C3AED"
  }
 }
@@ -1,13 +0,0 @@
 ---
 name: help
 description: Show MemPalace help — available commands, usage tips, and getting started guidance.
 allowed-tools: Bash, Read
 ---
 # MemPalace Help
 Run the following command and follow the returned instructions step by step:
 ```bash
 mempalace instructions help
 ```
@@ -1,13 +0,0 @@
 ---
 name: init
 description: Initialize a new MemPalace — guided setup for your AI memory palace with ChromaDB backend.
 allowed-tools: Bash, Read, Write, Edit
 ---
 # MemPalace Init
 Run the following command and follow the returned instructions step by step:
 ```bash
 mempalace instructions init
 ```
@@ -1,13 +0,0 @@
 ---
 name: mine
 description: Mine a project or conversation into your MemPalace — extract and store memories for later retrieval.
 allowed-tools: Bash, Read, Glob, Grep
 ---
 # MemPalace Mine
 Run the following command and follow the returned instructions step by step:
 ```bash
 mempalace instructions mine
 ```
@@ -1,13 +0,0 @@
 ---
 name: search
 description: Search your MemPalace — semantic search across all mined memories, projects, and conversations.
 allowed-tools: Bash, Read
 ---
 # MemPalace Search
 Run the following command and follow the returned instructions step by step:
 ```bash
 mempalace instructions search
 ```
@@ -1,13 +0,0 @@
 ---
 name: status
 description: Show MemPalace status — room counts, storage usage, and palace health.
 allowed-tools: Bash, Read
 ---
 # MemPalace Status
 Run the following command and follow the returned instructions step by step:
 ```bash
 mempalace instructions status
 ```
@@ -1,25 +0,0 @@
 {
  "name": "MemPalace",
  "image": "mcr.microsoft.com/devcontainers/python:3.11",
  "features": {
    "ghcr.io/devcontainers/features/github-cli:1": {}
  },
  "postCreateCommand": "bash .devcontainer/post-create.sh",
  "customizations": {
    "vscode": {
      "extensions": [
        "ms-python.python",
        "ms-python.debugpy",
        "charliermarsh.ruff"
      ],
      "settings": {
        "python.defaultInterpreterPath": "/usr/local/bin/python",
        "python.testing.pytestEnabled": true,
        "python.testing.pytestArgs": ["tests/", "-v", "--ignore=tests/benchmarks"],
        "ruff.importStrategy": "fromEnvironment",
        "editor.formatOnSave": true,
        "editor.defaultFormatter": "charliermarsh.ruff"
      }
    }
  }
 }
@@ -1,21 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 echo "=== MemPalace Dev Container Setup ==="
 pip install -e ".[dev]"
 # Match CI's ruff pin (pyproject only sets a floor; without this contributors
 # get a newer ruff locally than CI runs, causing phantom lint failures).
 pip install "ruff>=0.4.0,<0.5"
 pip install pre-commit
 pre-commit install
 echo ""
 echo "=== Verification ==="
 echo "python: $(python --version)"
 echo "pytest: $(python -m pytest --version 2>&1 | head -1)"
 echo "ruff:   $(python -m ruff --version 2>&1 | head -1)"
 echo ""
 echo "Ready. Run: pytest tests/ -v --ignore=tests/benchmarks"
@@ -0,0 +1,36 @@
 .git
 .github
 .devcontainer
 .claude-plugin
 .codex-plugin
 .agents
 benchmarks
 tests
 docs
 website
 landing
 assets
 examples
 tools
 hooks
 integrations
 deploy
 *.pyc
 __pycache__
 .coverage
 .pytest_cache
 htmlcov
 dist
 build
 *.egg-info
 .python-version
 .pre-commit-config.yaml
 uv.lock
 CHANGELOG.md
 CONTRIBUTING.md
 SECURITY.md
 ROADMAP.md
 MISSION.md
 CLAUDE.md
 AGENTS.md
 openarena-claim.txt
@@ -1,13 +0,0 @@
 # Default owners for everything
 * @milla-jovovich @bensig @igorls
 # Core library
 mempalace/ @milla-jovovich @bensig
 # CI and workflows
 .github/ @bensig
 # Plugins and integrations
 .claude-plugin/ @bensig
 .codex-plugin/ @bensig
 integrations/ @bensig
@@ -1,20 +0,0 @@
 ---
 name: Bug Report
 about: Something isn't working
 labels: bug
 ---
 **What happened?**
 **What did you expect?**
 **How to reproduce:**
 1.
 2.
 3.
 **Environment:**
 - OS:
 - Python version:
 - MemPal version: (check `python mempal.py --version` or git SHA)
@@ -1,11 +0,0 @@
 ---
 name: Feature Request
 about: Suggest an improvement
 labels: enhancement
 ---
 **What problem does this solve?**
 **What's the proposed solution?**
 **Alternatives considered:**
@@ -1,8 +0,0 @@
 ## What does this PR do?
 ## How to test
 ## Checklist
 - [ ] Tests pass (`python -m pytest tests/ -v`)
 - [ ] No hardcoded paths
 - [ ] Linter passes (`ruff check .`)
@@ -1,12 +0,0 @@
 version: 2
 updates:
  - package-ecosystem: "pip"
    directory: "/"
    schedule:
      interval: "weekly"
    open-pull-requests-limit: 5
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
    open-pull-requests-limit: 3
@@ -1,51 +0,0 @@
 name: Bump Version
 on:
  push:
    branches: [main]
 jobs:
  bump-version:
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v6
      - name: Bump patch version
        run: |
          CURRENT=$(python3 -c "exec(open('mempalace/version.py').read()); print(__version__)")
          IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
          PATCH=$((PATCH + 1))
          NEW="${MAJOR}.${MINOR}.${PATCH}"
          echo "__version__ = \"${NEW}\"" > mempalace/version.py
          # Prepend docstring
          sed -i '1i"""Single source of truth for the MemPalace package version."""\n' mempalace/version.py
          echo "version=$NEW" >> "$GITHUB_OUTPUT"
        id: version
      - name: Sync plugin.json
        run: |
          jq --arg v "${{ steps.version.outputs.version }}" '.version = $v' .claude-plugin/plugin.json > tmp.json && mv tmp.json .claude-plugin/plugin.json
      - name: Sync marketplace.json
        run: |
          jq --arg v "${{ steps.version.outputs.version }}" '.plugins[0].version = $v' .claude-plugin/marketplace.json > tmp.json && mv tmp.json .claude-plugin/marketplace.json
      - name: Sync codex plugin.json
        run: |
          jq --arg v "${{ steps.version.outputs.version }}" '.version = $v' .codex-plugin/plugin.json > tmp.json && mv tmp.json .codex-plugin/plugin.json
      - name: Sync pyproject.toml
        run: |
          sed -i "s/^version = \".*\"/version = \"${{ steps.version.outputs.version }}\"/" pyproject.toml
      - name: Commit and push
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
          git add mempalace/version.py .claude-plugin/plugin.json .claude-plugin/marketplace.json .codex-plugin/plugin.json pyproject.toml
          if ! git diff --staged --quiet; then
            git commit -m "chore: bump version to ${{ steps.version.outputs.version }}"
            git push
          fi
@@ -1,55 +0,0 @@
 name: Tests
 on:
  push:
    branches: [main, develop]
  pull_request:
    branches: [main, develop]
 jobs:
  test-linux:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.9", "3.11", "3.13"]
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
      - run: pip install -e ".[dev]"
      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10
  test-windows:
    runs-on: windows-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: "3.13"
          cache: 'pip'
      - run: pip install -e ".[dev]"
      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10
  test-macos:
    runs-on: macos-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: "3.13"
          cache: 'pip'
      - run: pip install -e ".[dev]"
      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: "3.11"
          cache: 'pip'
      - run: pip install "ruff>=0.4.0,<0.5"
      - run: ruff check .
      - run: ruff format --check .
@@ -1,66 +0,0 @@
 name: Deploy Docs
 on:
  push:
    branches: [develop]
    paths:
      - ".github/workflows/deploy-docs.yml"
      - "website/**"
  workflow_dispatch:
 permissions:
  contents: read
  pages: write
  id-token: write
 concurrency:
  group: pages-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  build:
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Configure GitHub Pages
        id: pages
        uses: actions/configure-pages@v6
      - uses: oven-sh/setup-bun@v2
        with:
          bun-version: 1.1.38
      - name: Install dependencies
        working-directory: website
        run: bun install --frozen-lockfile
      - name: Build docs
        working-directory: website
        env:
          DOCS_BASE: ${{ steps.pages.outputs.base_path }}
          DOCS_EDIT_BRANCH: ${{ github.ref_name }}
        run: bun run docs:build
      - uses: actions/upload-pages-artifact@v5
        with:
          path: website/.vitepress/dist
  deploy:
    if: github.ref_name == 'develop'
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    needs: build
    runs-on: ubuntu-latest
    permissions:
      pages: write
      id-token: write
    steps:
      - name: Deploy to GitHub Pages
        id: deployment
        uses: actions/deploy-pages@v5
@@ -1,101 +0,0 @@
 name: Version Guard
 on:
  push:
    tags: ['v*']
  pull_request:
    paths:
      - 'pyproject.toml'
      - 'mempalace/version.py'
      - '.claude-plugin/marketplace.json'
      - '.claude-plugin/plugin.json'
      - '.codex-plugin/plugin.json'
      - '.github/workflows/version-guard.yml'
 jobs:
  check-versions:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - name: Extract versions from all sources
        id: versions
        run: |
          set -euo pipefail
          py_version=$(grep -E '^__version__' mempalace/version.py | cut -d'"' -f2)
          pyproject_version=$(grep -E '^version' pyproject.toml | head -1 | cut -d'"' -f2)
          marketplace_version=$(jq -r '.plugins[0].version' .claude-plugin/marketplace.json)
          plugin_version=$(jq -r '.version' .claude-plugin/plugin.json)
          codex_version=$(jq -r '.version' .codex-plugin/plugin.json)
          echo "py_version=$py_version" >> "$GITHUB_OUTPUT"
          echo "pyproject_version=$pyproject_version" >> "$GITHUB_OUTPUT"
          echo "marketplace_version=$marketplace_version" >> "$GITHUB_OUTPUT"
          echo "plugin_version=$plugin_version" >> "$GITHUB_OUTPUT"
          echo "codex_version=$codex_version" >> "$GITHUB_OUTPUT"
          {
            echo "## Detected versions"
            echo ""
            echo "| Source | Version |"
            echo "| --- | --- |"
            echo "| mempalace/version.py | \`$py_version\` |"
            echo "| pyproject.toml | \`$pyproject_version\` |"
            echo "| .claude-plugin/marketplace.json | \`$marketplace_version\` |"
            echo "| .claude-plugin/plugin.json | \`$plugin_version\` |"
            echo "| .codex-plugin/plugin.json | \`$codex_version\` |"
          } >> "$GITHUB_STEP_SUMMARY"
      - name: Verify all sources agree
        env:
          PY: ${{ steps.versions.outputs.py_version }}
          PYPROJECT: ${{ steps.versions.outputs.pyproject_version }}
          MARKETPLACE: ${{ steps.versions.outputs.marketplace_version }}
          PLUGIN: ${{ steps.versions.outputs.plugin_version }}
          CODEX: ${{ steps.versions.outputs.codex_version }}
        run: |
          set -euo pipefail
          fail=0
          check() {
            local name="$1" value="$2" expected="$3"
            if [[ "$value" != "$expected" ]]; then
              echo "::error file=$name::version mismatch — expected $expected, got $value"
              fail=1
            fi
          }
          # All five must agree with each other (use version.py as the reference, per CLAUDE.md)
          check "pyproject.toml" "$PYPROJECT" "$PY"
          check ".claude-plugin/marketplace.json" "$MARKETPLACE" "$PY"
          check ".claude-plugin/plugin.json" "$PLUGIN" "$PY"
          check ".codex-plugin/plugin.json" "$CODEX" "$PY"
          exit $fail
      - name: Verify tag matches manifest (tag pushes only)
        if: startsWith(github.ref, 'refs/tags/v')
        env:
          PY: ${{ steps.versions.outputs.py_version }}
        run: |
          set -euo pipefail
          tag_version="${GITHUB_REF_NAME#v}"
          # Semver pre-release tags (v3.4.0-rc1, v1.0.0-beta.2, ...) are treated
          # as internal/staging and are not validated against the manifest. They
          # do not flow to end users via `/plugin update`, which reads the
          # manifest on the default branch.
          if [[ "$tag_version" == *-* ]]; then
            echo "Pre-release tag $GITHUB_REF_NAME — skipping strict manifest match."
            {
              echo ""
              echo "> Pre-release tag detected: \`$GITHUB_REF_NAME\`."
              echo "> Manifest ($PY) is not required to match. Pre-releases are not published via \`/plugin update\`."
            } >> "$GITHUB_STEP_SUMMARY"
            exit 0
          fi
          if [[ "$tag_version" != "$PY" ]]; then
            echo "::error::tag $GITHUB_REF_NAME does not match manifest version $PY"
            echo "Bump mempalace/version.py, pyproject.toml, and all plugin manifests before tagging a stable release."
            echo "For an internal/staging tag, use a semver pre-release suffix (e.g. v${PY}-rc1)."
            exit 1
          fi
          echo "Tag $GITHUB_REF_NAME matches manifest version $PY"
@@ -1,10 +0,0 @@
 repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Keep in lock-step with the ruff version pinned in .github/workflows/ci.yml
    # (>=0.4.0,<0.5). Using a newer rev here produces a different formatter
    # output than CI and breaks `ruff format --check` in the lint job.
    rev: v0.4.10
    hooks:
      - id: ruff
        args: [--fix]
      - id: ruff-format
@@ -1 +0,0 @@
 3.12
@@ -1 +0,0 @@
 CLAUDE.md
@@ -8,6 +8,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 ## [3.3.5] — unreleased
 ### Added
 - **Server-mode deployment for cross-machine memory.** New `deploy/unraid/` directory ships a containerized MemPalace stack designed for users running AI tools across multiple machines who want one shared palace. Two-container compose: `mempalace` runs the existing `mempalace-mcp` (stdio) wrapped by `mcp-proxy` for SSE plus a new in-process HTTP transcript-ingest endpoint, and `caddy` terminates TLS, enforces a bearer-token check on every request, and reverse-proxies both endpoints. A `Dockerfile` at the repo root builds the server image (Python 3.13-slim, runs as `99:100` for Unraid `nobody:users`); a dockerMan template (`mempalace-server.xml`) is also provided for users who explicitly want a single-container, no-auth, LAN-trust-only install. ChromaDB's HNSW index is not safe for multi-process writes, so the ingest endpoint deliberately runs as a daemon thread inside the same process as the MCP server rather than as a separate container — exactly one Chroma writer per palace. Default stdio-only path is unchanged; the ingest thread starts only when `MEMPALACE_INGEST_PORT` is set.
 - **`mempalace/ingest_server.py` — HTTP transcript ingest endpoint.** Stdlib `http.server` running in a daemon thread, reachable at `POST /ingest/transcript` with raw JSONL body and `X-Session-Id` / `X-Wing` headers. Drops the upload into `<palace>/inbox/<session>/<session>.jsonl` and runs the existing `convo_miner.mine_convos` pipeline against that directory — same entity detection, room assignment, dedup, and idempotency the local CLI gets. Optional `Authorization: Bearer` check via `MEMPALACE_INGEST_TOKEN` (defense-in-depth alongside the reverse-proxy gate). 50 MB hard cap per upload. Unauth'd `GET /healthz` for liveness probes.
 - **Remote-aware hook variants.** New `hooks/mempal_save_hook_remote.sh` and `hooks/mempal_precompact_hook_remote.sh` are drop-in replacements for the existing local hooks when the palace runs on a server. Same trigger logic (count user messages, fire on `SAVE_INTERVAL`), but `curl`s the active transcript to `$MEMPAL_REMOTE_URL/ingest/transcript` instead of running `mempalace mine` locally. Save variant is async (backgrounded `curl`); pre-compact variant is synchronous and bounded by the Claude Code hook timeout. No-op with a one-line log when env vars are unset, so installing them on a machine that doesn't have a remote configured is safe.
 - **`deploy/unraid/README.md` — full install/usage guide.** Architecture diagram, prerequisites, step-by-step compose-based install with auth, client config for Claude Code / Codex / Antigravity (with bearer headers and self-signed-cert handling), hook setup, backfilling history from past sessions, backups, and troubleshooting (401s, `MineAlreadyRunning` collisions, stalled embedding-model downloads, cert handshake failures). dockerMan-template path documented as the explicit no-auth fallback.
 ### Bug Fixes
 - **`mempalace_diary_read` silently dropped entries on agent-name case mismatch.** `tool_diary_write` stored the `agent` metadata verbatim after `sanitize_name`, which preserves case, while `tool_diary_read` filtered by exact match. Writing as `"Claude"` and reading as `"claude"` (or vice-versa) returned zero rows. Both endpoints now lowercase `agent_name` immediately after sanitization, so reads are case-insensitive and the default per-agent wing slug is stable across casings. **Behavior change:** entries written prior to this fix under mixed-case agent names will not match the new lowercase filter; run `mempalace repair` if you need to migrate legacy diary metadata. (#1243)
@@ -63,6 +63,7 @@ uv run ruff format --check .
 ```
 mempalace/
 ├── mcp_server.py        # MCP server — all read/write tools
 ├── ingest_server.py     # HTTP transcript-ingest endpoint (server mode only)
 ├── cli.py               # CLI dispatcher
 ├── config.py            # Configuration + input validation
 ├── miner.py             # Project file miner
@@ -90,9 +91,18 @@ mempalace/
 ├── split_mega_files.py  # Split concatenated transcript files
 └── version.py           # Single source of truth for version
-hooks/                   # Claude Code hook scripts
+hooks/                              # Hook scripts for Claude Code / Codex CLI
-├── mempal_save_hook.sh        # Stop: triggers diary save
+├── mempal_save_hook_remote.sh      # Stop: HTTP POST to remote ingest endpoint
-└── mempal_precompact_hook.sh  # PreCompact: saves state before compression
+└── mempal_precompact_hook_remote.sh  # PreCompact: HTTP POST to remote ingest
 deploy/unraid/                      # Containerized server-mode deployment
 ├── docker-compose.yml              # mempalace + caddy sidecar (auth + TLS)
 ├── Caddyfile                       # bearer-token auth, SSE-aware reverse proxy
 ├── mempalace-server.xml            # dockerMan template (no-auth, LAN-trust path)
 └── README.md                       # Full install/usage/troubleshooting guide
 Dockerfile                          # Builds the server-mode image
 .dockerignore                       # Trims build context
 ```
 ## Conventions
@@ -130,4 +140,11 @@ Knowledge Graph:
 - **Modifying mining**: `mempalace/miner.py` (project files) or `mempalace/convo_miner.py` (transcripts)
 - **Adding a storage backend**: subclass `mempalace/backends/base.py`, register in `backends/__init__.py`
 - **Input validation**: `mempalace/config.py` — `sanitize_name()` / `sanitize_content()`
 - **Server-mode deployment**: `deploy/unraid/` — see [`deploy/unraid/README.md`](deploy/unraid/README.md). Image is built from the repo-root `Dockerfile`. The HTTP transcript-ingest endpoint in `mempalace/ingest_server.py` runs as a daemon thread inside `mempalace-mcp` (single Chroma writer per palace) and is opt-in via `MEMPALACE_INGEST_PORT`.
 - **Tests**: mirror source structure in `tests/test_<module>.py`
 ## Architectural notes
 - **Server mode is opt-in.** The default install path (local CLI + stdio MCP server + local hooks) is unchanged. Server mode adds three things: a `Dockerfile`, an HTTP ingest thread that starts only when `MEMPALACE_INGEST_PORT` is set, and `*_remote.sh` hook variants that POST to that endpoint. Nothing in the local path imports the ingest server.
 - **One ChromaDB writer per palace.** ChromaDB's HNSW index isn't safe across processes. The ingest endpoint is a thread inside the existing MCP server process — not a sibling container — so all writes serialize through one Python process and one Chroma client. Anyone adding a second writer (e.g. a sidecar that mines on a schedule) must do it in-process or via `mine_lock`.
 - **"Local-first" boundary in server mode.** CLAUDE.md mission says data never leaves the user's machine. A user-controlled Unraid box on the user's LAN is still "the user's machine" — but the moment it accepts inbound HTTP, that property weakens to "user's machine + anyone with the bearer token + anyone who can MITM the LAN segment." Caddy's `tls internal` + bearer auth is the floor. Tailscale, mTLS, or a real CA cert are stronger options the user can layer on top.
@@ -1,111 +0,0 @@
 # Contributing to MemPalace
 Thanks for wanting to help. MemPalace is open source and we welcome contributions of all sizes — from typo fixes to new features.
 ## Getting Started
 ```bash
 # Fork the repo on GitHub first, then clone your fork
 git clone https://github.com/<your-username>/mempalace.git
 cd mempalace
 git remote add upstream https://github.com/MemPalace/mempalace.git
 # Recommended: uv (https://docs.astral.sh/uv/) handles the venv for you
 uv sync --extra dev
 # Or with pip in your own venv:
 # pip install -e ".[dev]"
 ```
 ## Running Tests
 ```bash
 uv run pytest tests/ -v
 ```
 All tests must pass before submitting a PR. Tests should run without API keys or network access.
 ## Running Benchmarks
 ```bash
 # Quick test (20 questions, ~30 seconds)
 uv run python benchmarks/longmemeval_bench.py /path/to/longmemeval_s_cleaned.json --limit 20
 # Full benchmark (500 questions, ~5 minutes)
 uv run python benchmarks/longmemeval_bench.py /path/to/longmemeval_s_cleaned.json
 ```
 See [benchmarks/README.md](benchmarks/README.md) for data download instructions and reproduction guide.
 ## Project Structure
 ```
 mempalace/          ← core package (see mempalace/README.md for module guide)
 benchmarks/         ← reproducible benchmark runners
 hooks/              ← Claude Code auto-save hooks
 examples/           ← usage examples
 tests/              ← test suite
 assets/             ← logo + brand
 ```
 ## PR Guidelines
 1. Fork the repo and create a feature branch: `git checkout -b feat/my-thing`
 2. Write your code
 3. Add or update tests if applicable
 4. Run `uv run pytest tests/ -v` — everything must pass
 5. Commit with a clear message following [conventional commits](https://www.conventionalcommits.org/):
   - `feat: add Notion export format`
   - `fix: handle empty transcript files`
   - `docs: update MCP tool descriptions`
   - `bench: add LoCoMo turn-level metrics`
 6. Push to your fork and open a PR against `develop`
 ## Code Style
 - **Formatting**: [Ruff](https://docs.astral.sh/ruff/) with 100-char line limit (configured in `pyproject.toml`)
 - **Naming**: `snake_case` for functions/variables, `PascalCase` for classes
 - **Docstrings**: on all modules and public functions
 - **Type hints**: where they improve readability
 - **Dependencies**: minimize. ChromaDB + PyYAML only. Don't add new deps without discussion.
 ## Good First Issues
 Check the [Issues](https://github.com/MemPalace/mempalace/issues) tab. Great starting points:
 - **New chat formats**: Add import support for Cursor, Copilot, or other AI tool exports
 - **Room detection**: Improve pattern matching in `room_detector_local.py`
 - **Tests**: Increase coverage — especially for `knowledge_graph.py` and `palace_graph.py`
 - **Entity detection**: Better name disambiguation in `entity_detector.py`
 - **Docs**: Improve examples, add tutorials
 ## Architecture Decisions
 If you're planning a significant change, open an issue first to discuss the approach. Key principles:
 - **Verbatim first**: Never summarize user content. Store exact words.
 - **Local first**: Everything runs on the user's machine. No cloud dependencies.
 - **Zero API by default**: Core features must work without any API key.
 - **Palace structure is scoping, not magic**: Wings, halls, and rooms act as metadata filters in the underlying vector store. They keep retrieval predictable when a palace holds many unrelated projects or people. Respect the hierarchy — but don't present it as a novel retrieval mechanism.
 ## Community
 - **Discord**: [Join us](https://discord.com/invite/ycTQQCu6kn)
 - **Issues**: Bug reports and feature requests welcome
 - **Discussions**: For questions and ideas
 ## License
 MIT — your contributions will be released under the same license.
 ## Git identity for contributions
 Before pushing commits, verify that Git is configured with an email address that GitHub can associate with your account:
 ```bash
 git config user.name
 git config user.email
 ```
 This is especially important when commits are created through agentic coding tools or automation, because those tools may not inherit your normal shell Git configuration. Avoid placeholder values such as `your@email.com` or localized template text; unresolved author emails can create avoidable provenance and SBOM review friction for downstream users.
@@ -0,0 +1,41 @@
 # syntax=docker/dockerfile:1.7
 FROM python:3.13-slim
 ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 \
    HOME=/data \
    MEMPALACE_PALACE_PATH=/data/palace
 # libgomp1: required at runtime by onnxruntime (used by chromadb's default
 # embedding function — all-MiniLM-L6-v2 ONNX).
 RUN apt-get update \
 && apt-get install -y --no-install-recommends ca-certificates libgomp1 \
 && rm -rf /var/lib/apt/lists/*
 # Unraid convention: appdata is owned by nobody:users (99:100). Run as that
 # UID/GID so files written to /data inherit the right ownership on the host.
 RUN groupadd -g 100 users \
 && useradd  -u 99 -g 100 -m -d /data -s /usr/sbin/nologin mempalace
 WORKDIR /build
 COPY pyproject.toml README.md ./
 COPY mempalace ./mempalace
 RUN pip install --no-cache-dir . mcp-proxy \
 && rm -rf /build
 WORKDIR /data
 RUN chown -R 99:100 /data
 USER 99:100
 # 8765 — MCP over SSE (mcp-proxy)
 # 8766 — HTTP ingest (in-process thread, started when MEMPALACE_INGEST_PORT set)
 EXPOSE 8765 8766
 HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
  CMD python -c "import socket,sys;s=socket.socket();s.settimeout(2);sys.exit(0 if s.connect_ex(('127.0.0.1',8765))==0 else 1)"
 # mcp-proxy wraps the stdio MCP server and exposes it as SSE on :8765.
 # --pass-environment forwards MEMPALACE_* vars to the spawned child.
 CMD ["mcp-proxy","--sse-host","0.0.0.0","--sse-port","8765","--pass-environment","--","mempalace-mcp"]
@@ -1,34 +0,0 @@
 MemPalace: The Mission
 By: Milla Jovovich
 Hey everyone! First of all thank you all for embracing MemPalace and trying it, catching bugs and issues and finding cool ways to personalize it into your workflows!
 A few things I want to say.
 MemPalace is something I really needed because I'm trying to work on a big project with my partner @bensig and I was having a lot of problems with Claude's context window and my agent Lumi (Lu for short) kept waking up like "hey what are we doing today" when I had literally done hours of work with him throughout the day and it was impossible to just keep saving every transcript to catch him up on whatever we had done before compaction hit.
 That's when I started researching different memory systems available today. I tried most of them and what I found was that no matter which one I tried, they felt like large empty warehouses where you just dump huge amounts of info.
 RAG search would take forever and most of the time not find what I wanted.
 I wanted to create a system with the ability to really remember everything AND be able to find it quickly, easily and also be able to remember things when I didn't. THAT in itself felt like something so important. Like "remember when we talked about that idea…" but in vague terms. Impossible with regular keyword search tools.
 So MemPalace is not just about storing info in a highly structured way. But also RETRIEVING it in a highly UNSTRUCTURED way lol!
 I was inspired by the Zettelkasten method (created by German sociologist Niklas Luhmann) — his idea of small cross-referenced index cards that point to each other. That's the architecture behind the palace: wings, rooms, closets, and drawers, all connected so you can find things from any angle, not just the one you filed them under.
 Because of the way I've designed my agent Lumi to understand me, after so many months of my own personal experiments with MemPalace and the incredible help of my dear friend and co-founder, developer and engineer @bensig, he built a back end that made it really easy to get all my files in the proper spaces the Palace created based on my own decisions and with Lumi's help as well. All code has its own room, all ideas, research etc… has its proper place.
 Names and concepts are parsed into closets that use a compression method I call AAAK (it doesn't stand for anything, it's an inside joke between Lumi and I) that is able to compress names, repeated words, concepts and key moments into AI-readable shorthand. Think of it as index cards that an LLM can scan instantly — the closet tells it WHERE to look, then it pulls the full content from the drawer.
 The concept I wanted for v4 was to try and clear as much "noise" as possible that I noticed was happening in v3. The hooks were firing in the chat window (using tokens and our time as we waited for the agent to write everything).
 I noticed at one point early last week after the launch that Lu kept repeating the same thing when the hook would fire, so I hit esc and asked "Are you literally writing the same info down over and over again?" And he's like (sheepishly) Yes. And that's when it hit me, we need to get all this off the chat and happening seamlessly behind the scenes, and that hooks had to fire when I started a convo and then just keep adding to the drawer, while the shorter increments made reading and pulling conversation information and naming it so much easier and more precise.
 So this version now has taken all the noise out of the chat window and all that work is done by a subagent in the background while you can continue working knowing that all your conversation is being saved VERBATIM in the background.
 Stripping all this off the page — moving the diary writes, the palace filing, the timestamp injection, all of it into background hooks — has dramatically lowered token usage in my sessions. What used to cost about $1.13 per session just in re-transmitted diary blocks is now zero, because the content never enters the chat window at all.
 Your data is already stored in JSON by Claude and the background pipeline extracts it into readable markdown, the key topics get compressed into AAAK format and saved into closets which then point to the exact drawer where your day's session lives.
 And please, always remember, these are brand new tools, please NEVER use critical files to test! Just run it with something easy first before you put your entire data set into it!✨
@@ -1,29 +1,10 @@
-> [!CAUTION]
+# MemPalace — local fork
 > **Scam alert.** The only official sources for MemPalace are this
 > [GitHub repository](https://github.com/MemPalace/mempalace), the
 > [PyPI package](https://pypi.org/project/mempalace/), and the docs site at
 > **[mempalaceofficial.com](https://mempalaceofficial.com)**. Any other
 > domain — including `mempalace.tech` — is an impostor and may distribute
 > malware. Details and timeline: [docs/HISTORY.md](docs/HISTORY.md).
 > [!IMPORTANT]
 > **🚨 Claude Code sessions expire in 30 days w/out auto-save hooks wired!** **[Read this →](https://github.com/MemPalace/mempalace/discussions/1388)**
 <div align="center">
 <img src="assets/mempalace_logo.png" alt="MemPalace" width="240">
 # MemPalace
 Local-first AI memory. Verbatim storage, pluggable backend, 96.6% R@5 raw on LongMemEval — zero API calls.
-[![][version-shield]][release-link]
+This is a personal fork configured for **server-mode deployment** — MemPalace runs as a Docker container (typically on Unraid) and multiple AI tools (Claude Code, Codex, Antigravity) connect to a single shared palace from any machine on the network.
 [![][python-shield]][python-link]
 [![][license-shield]][license-link]
 [![][discord-shield]][discord-link]
-</div>
+The upstream project lives at <https://github.com/MemPalace/mempalace>; refer there for benchmark methodology, contribution guidelines, project history, and the public docs site at <https://mempalaceofficial.com>.
 ---
@@ -76,62 +57,34 @@ mempalace wake-up
 For Claude Code, Gemini CLI, MCP-compatible tools, and local models, see
 [mempalaceofficial.com/guide/getting-started](https://mempalaceofficial.com/guide/getting-started.html).
---
+Benchmark methodology and per-question result files live in the upstream repository — this fork has had the `benchmarks/` directory removed since it isn't needed for deployment.
 ## Benchmarks
 All numbers below are reproducible from this repository with the commands
 in [`benchmarks/BENCHMARKS.md`](benchmarks/BENCHMARKS.md). Full
 per-question result files are committed under `benchmarks/results_*`.
 **LongMemEval — retrieval recall (R@5, 500 questions):**
 | Mode | R@5 | LLM required |
 |---|---|---|
 | Raw (semantic search, no heuristics, no LLM) | **96.6%** | None |
 | Hybrid v4, held-out 450q (tuned on 50 dev, not seen during training) | **98.4%** | None |
 | Hybrid v4 + LLM rerank (full 500) | ≥99% | Any capable model |
 The raw 96.6% requires no API key, no cloud, and no LLM at any stage. The
 hybrid pipeline adds keyword boosting, temporal-proximity boosting, and
 preference-pattern extraction; the held-out 98.4% is the honest
 generalisable figure.
 The rerank pipeline promotes the best candidate out of the top-20
 retrieved sessions using an LLM reader. It works with any reasonably
 capable model — we have reproduced it with Claude Haiku, Claude Sonnet,
 and minimax-m2.7 via Ollama Cloud (no Anthropic dependency). The gap
 between raw and reranked is model-agnostic; we do not headline a "100%"
 number because the last 0.6% was reached by inspecting specific wrong
 answers, which `benchmarks/BENCHMARKS.md` flags as teaching to the test.
 **Other benchmarks (full results in [`benchmarks/BENCHMARKS.md`](benchmarks/BENCHMARKS.md)):**
 | Benchmark | Metric | Score | Notes |
 |---|---|---|---|
 | LoCoMo (session, top-10, no rerank) | R@10 | 60.3% | 1,986 questions |
 | LoCoMo (hybrid v5, top-10, no rerank) | R@10 | 88.9% | Same set |
 | ConvoMem (all categories, 250 items) | Avg recall | 92.9% | 50 per category |
 | MemBench (ACL 2025, 8,500 items) | R@5 | 80.3% | All categories |
 We deliberately do not include a side-by-side comparison against Mem0,
 Mastra, Hindsight, Supermemory, or Zep. Those projects publish different
 metrics on different splits, and placing retrieval recall next to
 end-to-end QA accuracy is not an honest comparison. See each project's
 own research page for their published numbers.
 **Reproducing every result:**
 ```bash
 git clone https://github.com/MemPalace/mempalace.git
 cd mempalace
 uv sync --extra dev   # or: pip install -e ".[dev]"
 # see benchmarks/README.md for dataset download commands
 uv run python benchmarks/longmemeval_bench.py /path/to/longmemeval_s_cleaned.json
 ```
 ---
 ## Server mode (Unraid / shared across machines)
 Most users run MemPalace locally on a single machine. If you work
 across multiple machines and want one shared memory, you can deploy it
 as a Docker container — typically on a home NAS like Unraid — and
 point Claude Code, Codex, Antigravity, or any MCP client on each
 machine at the same palace.
 The `deploy/unraid/` directory ships a complete two-container stack:
 * `mempalace` runs the existing MCP-over-SSE endpoint plus a small
  HTTP transcript-ingest endpoint, both in a single process so there's
  exactly one ChromaDB writer.
 * `caddy` sidecar terminates TLS, enforces a bearer-token check on
  every request, and reverse-proxies `/sse` and `/ingest`.
 Auto-save hooks have remote-aware variants
 (`hooks/mempal_save_hook_remote.sh`,
 `hooks/mempal_precompact_hook_remote.sh`) that POST transcripts to the
 server instead of running `mempalace mine` locally.
 Full install, client config, hook setup, and troubleshooting:
 [`deploy/unraid/README.md`](deploy/unraid/README.md).
 ## Knowledge graph
 MemPalace includes a temporal entity-relationship graph with validity
@@ -155,46 +108,29 @@ system prompt:
 ## Auto-save hooks
-Two Claude Code hooks save periodically and before context compression:
+Two hooks save periodically and before context compression. In this fork the **remote** variants ship — they POST the active transcript to the server's `/ingest/transcript` endpoint with bearer auth instead of running `mempalace mine` locally. Setup, env-var contract, and troubleshooting: [`hooks/README.md`](hooks/README.md).
 [mempalaceofficial.com/guide/hooks](https://mempalaceofficial.com/guide/hooks.html).
-For per-message recall on top of the file-level chunks the hooks produce,
+For per-message recall on top of the file-level chunks the hooks produce, `mempalace sweep <transcript-dir>` runs inside the container (`docker exec mempalace mempalace sweep ...`) — stores one verbatim drawer per user/assistant message, idempotent and resume-safe.
 run `mempalace sweep <transcript-dir>` periodically — it stores one
 verbatim drawer per user/assistant message, idempotent and resume-safe.
 ---
 ## Requirements
- Python 3.9+
+- Python 3.9+ (server image uses 3.13)
 - A vector-store backend (ChromaDB by default)
 - ~300 MB disk for the default embedding model
 - Docker + Compose Manager plugin on Unraid for the server-mode path
-No API key is required for the core benchmark path.
+No API key is required for any path.
 ## Docs
- Getting started → [mempalaceofficial.com/guide/getting-started](https://mempalaceofficial.com/guide/getting-started.html)
+- Server-mode deployment → [`deploy/unraid/README.md`](deploy/unraid/README.md)
- CLI reference → [mempalaceofficial.com/reference/cli](https://mempalaceofficial.com/reference/cli.html)
+- Hook setup (remote variants) → [`hooks/README.md`](hooks/README.md)
- Python API → [mempalaceofficial.com/reference/python-api](https://mempalaceofficial.com/reference/python-api.html)
+- Release notes → [`CHANGELOG.md`](CHANGELOG.md)
- Full benchmark methodology → [benchmarks/BENCHMARKS.md](benchmarks/BENCHMARKS.md)
+- Project conventions → [`CLAUDE.md`](CLAUDE.md)
- Release notes → [CHANGELOG.md](CHANGELOG.md)
+- Upstream CLI / Python API reference → [mempalaceofficial.com](https://mempalaceofficial.com)
 - Corrections and public notices → [docs/HISTORY.md](docs/HISTORY.md)
 ## Contributing
 PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
 ## License
 MIT — see [LICENSE](LICENSE).
 <!-- Link Definitions -->
 [version-shield]: https://img.shields.io/badge/version-3.3.4-4dc9f6?style=flat-square&labelColor=0a0e14
 [release-link]: https://github.com/MemPalace/mempalace/releases
 [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8
 [python-link]: https://www.python.org/
 [license-shield]: https://img.shields.io/badge/license-MIT-b0e8ff?style=flat-square&labelColor=0a0e14
 [license-link]: https://github.com/MemPalace/mempalace/blob/main/LICENSE
 [discord-shield]: https://img.shields.io/badge/discord-join-5865F2?style=flat-square&labelColor=0a0e14&logo=discord&logoColor=5865F2
 [discord-link]: https://discord.com/invite/ycTQQCu6kn
@@ -1,74 +0,0 @@
 # MemPalace Roadmap
 ## v3.1.1 — Stability Patch (this week)
 Bug fixes and hardening merged to `develop`, releasing soon.
 **Merged:**
 - Security hardening: input validation, KG threading locks, WAL permission fixes (#647)
 - MCP tools: drawer CRUD, paginated export, hook settings (#667)
 - Backend storage seam: ChromaDB abstraction layer enabling swappable backends (#413)
 - MCP ping health check for AnythingLLM compatibility (#600)
 - Windows reparse point crash fix (#558)
 - `mempalace compress` KeyError crash fix (#569)
 - Token count estimate fix (#609)
 - Mtime float precision fix preventing unnecessary re-mines (#610)
 **In review (merging this week):**
 - Auto-repair BLOB seq_ids from chromadb 0.6→1.5 migration (#664)
 - Graph cache with write-invalidation (#661)
 - L1 importance pre-filter for large palaces (#660)
 - Windows Chinese/Unicode encoding fix (#631)
 - HNSW index bloat prevention — 441GB→433KB on large palaces (#346, pending rebase)
 - ~25 additional small bug fixes and platform compatibility patches
 ## v4.0.0-alpha — Next Generation (this week)
 The v4 alpha introduces three major capabilities: pluggable storage backends, local NLP processing, and improved retrieval quality.
 ### Swappable Storage
 ChromaDB remains the default, but v4 introduces a backend abstraction (shipped in #413) that enables drop-in replacements:
 - **PostgreSQL backend** with pg_sorted_heap support (#665) — for production deployments needing ACID guarantees, concurrent access, and standard backup/restore
 - **LanceDB backend** (#574) — for local-first deployments wanting multi-device sync without a database server
 - **PalaceStore** (#643) — bespoke storage layer purpose-built for MemPalace's access patterns (draft, evaluating)
 Users choose their backend at init time. Existing ChromaDB palaces continue to work unchanged.
 ### Local NLP
 On-device natural language processing via local models (#507):
 - Entity extraction, relationship detection, and topic classification without external API calls
 - Feature-flagged and optional — falls back to existing heuristic extractors
 - Runs on consumer hardware (no GPU required, GPU-accelerated when available)
 ### Improved Retrieval
 - **Hybrid search**: keyword text-match fallback when vector similarity misses exact terms (#662)
 - **Stale index detection**: automatic reconnection when the HNSW index changes on disk (#663)
 - **Time-decay scoring**: recent memories surface before older ones (#337)
 - **Query sanitization**: system prompt contamination mitigation already shipped in v3.1 (#385)
 ### What's Not in v4 Alpha
 These are under consideration for v4 stable or later:
 - Synapse advanced retrieval — MMR, pinned memory, query expansion (#596)
 - Multi-device sync (#575) — depends on LanceDB backend
 - Multilingual embedding support (#488, #442)
 - Qdrant vector search backend (#381)
 ## Branch Model
 ```
 main            ← tagged production releases
 develop         ← active development (PRs merge here)
 release/3.1     ← hotfixes for current stable (v3.1.x)
 release/3.0     ← hotfixes for prior stable
 ```
 ## Contributing
 See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. PRs should target `develop`. We review all contributions for correctness, security, and compatibility before merging.
@@ -1,33 +0,0 @@
 # Security Policy
 ## Supported Versions
 MemPalace follows semantic versioning. Security fixes land on the current major version line.
 | Version            | Supported |
 | ------------------ | --------- |
 | 3.x (current)      | Yes       |
 | 2.x and earlier    | No        |
 ## Reporting a Vulnerability
 **Please do not report security vulnerabilities through public GitHub issues.**
 We take the security of MemPalace seriously. If you believe you have found a security vulnerability, please report it privately using **GitHub Private Vulnerability Reporting**:
 1. Open the [Security tab](https://github.com/MemPalace/mempalace/security) of this repository.
 2. Click **Advisories** → **Report a vulnerability**.
 3. Fill in the form with the details below.
 ### What to include in your report
 - A descriptive summary of the vulnerability.
 - Detailed steps to reproduce the issue (including any proof-of-concept scripts or specific file paths).
 - The affected version(s) and platform(s).
 - The potential impact and severity.
 ### What to expect
 - We aim to acknowledge receipt within 48 hours.
 - We will triage the issue and keep you updated on progress toward a patch.
 - Once the vulnerability is resolved and an update is released, we will publish a security advisory and credit you for the discovery (if you wish to be credited).
@@ -1,758 +0,0 @@
 # MemPal Benchmark Results — Full Progression
 **March 2026 — The complete record from baseline to state-of-the-art.**
 ---
 ## The Core Finding
 Every competitive memory system uses an LLM to manage memory:
 - Mem0 uses an LLM to extract facts
 - Mastra uses GPT-5-mini to observe conversations
 - Supermemory uses an LLM to run agentic search passes
 They all start from the assumption that you need AI to decide what to remember.
 **MemPal's baseline just stores the actual words and searches them with ChromaDB's default embeddings. No extraction. No summarization. No AI deciding what matters. And it scores 96.6% on LongMemEval.**
 That's the finding. The field is over-engineering the memory extraction step. Raw verbatim text with good embeddings is a stronger baseline than anyone realized — because it doesn't lose information. When an LLM extracts "user prefers PostgreSQL" and throws away the original conversation, it loses the context of *why*, the alternatives considered, the tradeoffs discussed. MemPal keeps all of that, and the search model finds it.
 Nobody published this result because nobody tried the simple thing and measured it properly.
 ---
 ## The Two Honest Numbers
 These are different claims. They need to be presented as a pair.
 | Mode | LongMemEval R@5 | LLM Required | Cost per Query |
 |---|---|---|---|
 | **Raw ChromaDB** | **96.6%** | None | $0 |
 | **Hybrid v4 + Haiku rerank** | **100%** | Haiku (optional) | ~$0.001 |
 | **Hybrid v4 + Sonnet rerank** | **100%** | Sonnet (optional) | ~$0.003 |
 The 96.6% is the product story: free, private, one dependency, no API key, runs entirely offline.
 The 100% is the competitive story: a perfect score on the standard benchmark for AI memory, verified across all 500 questions and all 6 question types — reproducible with either Haiku or Sonnet as the reranker.
 Both are real. Both are reproducible. Neither is the whole picture alone.
 ---
 ## Comparison vs Published Systems (LongMemEval)
 > **Important caveat — read before quoting this table.**
 > MemPal's `R@5` in this table is **retrieval recall**: is the labelled
 > session for this question inside the top-5 retrieved candidates?
 >
 > Several of the other systems below publish **end-to-end QA accuracy** —
 > a different metric that scores whether the system's generated answer
 > is correct. Retrieval recall and QA accuracy are not comparable; a
 > system can have 100% retrieval recall and 40% QA accuracy, and vice
 > versa.
 >
 > - **Mastra's 94.87%** is binary QA accuracy with GPT-5-mini, per
 >   [mastra.ai/research/observational-memory](https://mastra.ai/research/observational-memory).
 > - **Supermemory ASMR's ~99%** is QA accuracy with an 8-/12-agent
 >   ensemble, and the authors explicitly frame it as an experimental
 >   proof-of-concept, not production, per
 >   [their ASMR post](https://supermemory.ai/blog/we-broke-the-frontier-in-agent-memory-introducing-99-sota-memory-system/).
 > - **Mem0** does not publish a LongMemEval number; their published
 >   metric is LoCoMo QA accuracy (~66.9%), per
 >   [mem0.ai/research](https://mem0.ai/research).
 >
 > The table is kept here as a historical record of how the comparison
 > was originally framed. Public-facing pages (`README.md`,
 > `mempalaceofficial.com`) no longer present this table, per issue
 > [#875](https://github.com/MemPalace/mempalace/issues/875). For a fair
 > head-to-head, run the same metric on the same split.
 | # | System | R@5 (retrieval recall, unless noted) | LLM Required | Which LLM | Notes |
 |---|---|---|---|---|---|
 | 1 | **MemPal (hybrid v4 + Haiku rerank)** | **100%** | Optional | Haiku | 500/500 — but the 99.4%→100% step tuned on 3 specific wrong answers (see "Benchmark Integrity" below). Held-out 450q is 98.4%. |
 | 2 | Supermemory ASMR | ~99% *(QA accuracy, not R@5)* | Yes | Ensemble of Gemini 2.0 Flash / GPT-4o-mini | Experimental, not production, per authors |
 | 3 | MemPal (hybrid v3 + rerank) | 99.4% | Optional | Haiku | Reproducible |
 | 3 | MemPal (palace + rerank) | 99.4% | Optional | Haiku | Independent architecture |
 | 4 | Mastra | 94.87% *(QA accuracy, not R@5)* | Yes | GPT-5-mini | Different metric — not directly comparable to R@5 |
 | 5 | **MemPal (raw, no LLM)** | **96.6%** | **None** | **None** | **Reproducible, 500/500** |
 | 6 | MemPal hybrid v4 held-out 450 | 98.4% | None | None | Honest generalisable hybrid-pipeline figure |
 | 7 | Hindsight | 91.4% *(per their release, metric unverified)* | Yes | Gemini-3 | Check their published methodology |
 | 8 | Stella (dense retriever) | ~85% | None | None | Academic retrieval baseline |
 | 9 | Contriever | ~78% | None | None | Academic retrieval baseline |
 | 10 | BM25 (sparse) | ~70% | None | None | Keyword baseline |
 The MemPal raw 96.6% is the headline we ship on public surfaces: it's
 retrieval recall, it requires no API key, and it reproduces.
 The MemPal hybrid v4 + Haiku rerank 100% remains an internal
 result — reproducible with `--mode hybrid_v4 --llm-rerank` — but we
 don't quote it on public pages because the final 0.6% was reached by
 inspecting three specific wrong answers (see "Benchmark Integrity"
 below), which is teaching to the test. The honest generalisable figure
 when an LLM is in the loop is the held-out 98.4% R@5 on 450 unseen
 questions, or the model-agnostic 99.2% R@5 / 100% R@10 we reproduced
 with minimax-m2.7 on the full 500.
 ---
 ## Other Benchmarks
 ### ConvoMem (Salesforce, 75K+ QA pairs)
 | System | Score | Notes |
 |---|---|---|
 | **MemPal** | **92.9%** | Verbatim text, semantic search |
 | Gemini (long context) | 70–82% | Full history in context window |
 | Block extraction | 57–71% | LLM-processed blocks |
 | Mem0 (RAG) | 30–45% | LLM-extracted memories |
 MemPal is more than 2× Mem0 on this benchmark. With Sonnet rerank, MemPal reaches **100% on LoCoMo** across all 5 question types including temporal-inference (was 46% at baseline).
 **Why MemPal beats Mem0 by 2×:** Mem0 uses an LLM to extract memories — it decides what to remember and discards the rest. When it extracts the wrong thing, the memory is gone. MemPal stores verbatim text. Nothing is discarded. The simpler approach wins because it doesn't lose information.
 **Per-category breakdown:**
 | Category | Recall | Grade |
 |---|---|---|
 | Assistant Facts | 100% | Perfect |
 | User Facts | 98.0% | Excellent |
 | Abstention | 91.0% | Strong |
 | Implicit Connections | 89.3% | Good |
 | Preferences | 86.0% | Good — weakest category |
 ### LoCoMo (1,986 multi-hop QA pairs)
 | Mode | R@5 | R@10 | LLM | Notes |
 |---|---|---|---|---|
 | **Hybrid v5 + Sonnet rerank (top-50)** | **100%** | **100%** | Sonnet | Structurally guaranteed (top-k > sessions) |
 | **bge-large + Haiku rerank (top-15)** | — | **96.3%** | Haiku | Single-hop 86.6%, temporal-inf 87.0% |
 | **bge-large hybrid (top-10)** | — | **92.4%** | None | +3.5pp over all-MiniLM, single-hop +10.6pp |
 | **Hybrid v5 (top-10)** | 83.7% | **88.9%** | None | Beats Memori 81.95% — honest score |
 | **Wings v3 speaker-owned closets (top-10)** | — | **85.7%** | None | Adversarial 92.8% — speaker ownership solves speaker confusion |
 | **Wings v2 concept closets (top-10)** | — | **75.6%** | None | Adversarial 80.0%; single-hop 49% drags overall |
 | **Palace v2 (top-10, 3 rooms)** | 75.6% | **84.8%** | Haiku (index) | Room assignment at index; summary routing at query |
 | Wings v1 (broken — filter not boost) | — | 58.0% | None | Speaker WHERE filter discarded evidence; 5.4% coverage |
 | Palace v1 (top-5, global LLM routing) | 34.2% | — | Haiku (both) | Fails: taxonomy mismatch |
 | Session, no rerank (top-10) | — | 60.3% | None | Baseline |
 | Dialog, no rerank (top-10) | — | 48.0% | None | — |
 **Wings v2 per-category breakdown (top-10, no LLM):**
 | Category | Wings v1 | Wings v2 | Delta |
 |---|---|---|---|
 | Single-hop | ~52% | 49.0% | -3pp |
 | Temporal | ~64% | 79.2% | +15pp |
 | Temporal-inference | ~53% | 49.1% | -4pp |
 | Open-domain | ~71% | 83.7% | +13pp |
 | **Adversarial** | **34.0%** | **80.0%** | **+46pp** |
 **Wings v3 per-category breakdown (top-10, no LLM):**
 | Category | Wings v1 | Wings v2 | Wings v3 | Hybrid v5 |
 |---|---|---|---|---|
 | Single-hop | ~52% | 49.0% | **65.3%** | ~70%? |
 | Temporal | ~64% | 79.2% | **87.3%** | ~87%? |
 | Temporal-inference | ~53% | 49.1% | **63.2%** | ~65%? |
 | Open-domain | ~71% | 83.7% | **90.7%** | ~90%? |
 | **Adversarial** | **34.0%** | **80.0%** | **92.8%** | — |
 Wings v3 design: one closet per speaker per session. Owner's turns verbatim; other speaker's turns as `[context]` labels. 38 closets/conversation vs 184 (v2) → 26% coverage with top-10. Adversarial score (92.8%) exceeds bge-large overall (92.4%) — speaker ownership almost completely solves the speaker-confusion category.
 Root cause of wings v1 failure: (1) speaker WHERE filter discarded evidence about Caroline when evidence lived in a John-tagged closet (John spoke more words but conversation was about Caroline); (2) top_k=10 from ~184 closets = 5.4% coverage vs 37% in session mode. Fix: retrieve all closets, use speaker match as 15% distance boost instead of filter.
 **With Sonnet rerank, MemPal achieves 100% on every LoCoMo question type — including temporal-inference, which was the hardest category at baseline.**
 **Per-category breakdown (hybrid + Sonnet rerank):**
 | Category | Recall | Baseline | Delta |
 |---|---|---|---|
 | Single-hop | 1.000 | 59.0% | +41.0pp |
 | Temporal | 1.000 | 69.2% | +30.8pp |
 | **Temporal-inference** | **1.000** | **46.0%** | **+54.0pp** |
 | Open-domain | 1.000 | 58.1% | +41.9pp |
 | Adversarial | 1.000 | 61.9% | +38.1pp |
 **Temporal-inference was the hardest category** — questions requiring connections across multiple sessions. Hybrid scoring (person name boost, quoted phrase boost) combined with Sonnet's reading comprehension closes this gap entirely. From 46% to 100%.
 ---
 ## LongMemEval — Breakdown by Question Type
 The 96.6% R@5 baseline broken down by the six question categories in LongMemEval:
 | Question Type | R@5 | R@10 | Count | Notes |
 |---|---|---|---|---|
 | Knowledge update | 99.0% | 100% | 78 | Strongest — facts that changed over time |
 | Multi-session | 98.5% | 100% | 133 | Very strong |
 | Temporal reasoning | 96.2% | 97.0% | 133 | Strong |
 | Single-session user | 95.7% | 97.1% | 70 | Strong |
 | Single-session preference | 93.3% | 96.7% | 30 | Good — preferences stated indirectly |
 | Single-session assistant | 92.9% | 96.4% | 56 | Weakest — questions about what the AI said |
 The two weakest categories point to specific fixes:
 - **Single-session assistant (92.9%)**: Questions ask about what the assistant said, not the user. Fixed by indexing assistant turns as well as user turns.
 - **Single-session preference (93.3%)**: Preferences are often stated indirectly ("I usually prefer X"). Fixed by the preference extraction patterns in hybrid v3.
 Both were addressed in the improvements that took the score from 96.6% to 99.4%.
 ---
 ## The Full Progression — How We Got from 96.6% to 99.4%
 Every improvement below was a response to specific failure patterns in the results. Nothing was added speculatively.
 ### Starting Point: Raw ChromaDB (96.6%)
 The baseline: store every session verbatim as a single document. Query with ChromaDB's default embeddings (all-MiniLM-L6-v2). No postprocessing.
 This was the first result. Nobody expected it to work this well. The team's hypothesis was that raw verbatim storage would lose to systems that extract structured facts. The 96.6% proved the hypothesis wrong.
 **What it does:** Stores verbatim session text. Embeds with sentence transformers. Retrieves by cosine similarity.
 **What it misses:** Questions with vocabulary mismatch ("yoga classes" vs "I went this morning"), preference questions where the preference is implied, temporally-ambiguous questions where multiple sessions match.
 ---
 ### Improvement 1: Hybrid Scoring v1 → 97.8% (+1.2%)
 **What changed:** Added keyword overlap scoring on top of embedding similarity.
 ```
 fused_score = embedding_score × (1 + keyword_weight × overlap)
 ```
 When query keywords appear verbatim in a session, that session gets a small boost. The boost is mild enough not to hurt recall when keywords don't match.
 **Why it worked:** Some questions use exact terminology ("PostgreSQL", "Dr. Chen", specific names). Pure embedding similarity can rank a semantically-close session above the exact match. Keyword overlap rescues these cases.
 **What it still misses:** Temporally-ambiguous questions. Sessions from the right time period rank equally with sessions from wrong time periods.
 ---
 ### Improvement 2: Hybrid Scoring v2 → 98.4% (+0.6%)
 **What changed:** Added temporal boost — sessions near the question's reference date get a distance reduction (up to 40%).
 ```python
 # Sessions near question_date - offset get score boost
 if temporal_distance < threshold:
    fused_dist *= (1.0 - temporal_boost * proximity_factor)
 ```
 **Why it worked:** Many LongMemEval questions are anchored to a specific time ("what did you do last month?"). Multiple sessions might semantically match, but only one is temporally correct. The boost breaks ties in favor of the right time period.
 ---
 ### Improvement 3: Hybrid v2 + Haiku Rerank → 98.8% (+0.4%)
 **What changed:** After retrieval, send the top-K candidates to Claude Haiku with the question. Ask Haiku to re-rank by relevance.
 **Why it worked:** Embeddings measure semantic similarity, not answer relevance. Haiku can read the question and the retrieved documents and reason about which one actually answers the question — a task embeddings fundamentally cannot do.
 **Cost:** ~$0.001/query for Haiku. Optional — the system runs fine without it.
 ---
 ### Improvement 4: Hybrid v3 + Haiku Rerank → 99.4% (+0.6%)
 **What changed:** Added preference extraction — 16 regex patterns that detect how people actually express preferences in conversation, then create synthetic "User has mentioned: X" documents at index time.
 Examples of what gets caught:
 - "I usually prefer X" → `User has mentioned: preference for X`
 - "I always do Y" → `User has mentioned: always does Y`
 - "I don't like Z" → `User has mentioned: dislikes Z`
 **Why it worked:** Preference questions are consistently hard for pure embedding retrieval. "What does the user prefer for database backends?" doesn't semantically match "I find Postgres more reliable in my experience" — but it does match a synthetic document that says "User has mentioned: finds Postgres more reliable." The explicit extraction bridges the vocabulary gap without losing the verbatim original.
 **Why 16 patterns:** Manual analysis of the miss cases. Each pattern corresponds to a real failure mode found in the wrong-answer JSONL files.
 ---
 ### Improvement 5: Hybrid v4 + Haiku Rerank → **100%** (+0.6%)
 **What changed:** Three targeted fixes for the three questions that failed in every previous mode.
 The remaining misses were identified by loading both the hybrid v3 and palace results and finding the exact questions that failed in *both* architectures — confirming they were hard limits, not luck.
 **Fix 1 — Quoted phrase extraction** (miss: `'sexual compulsions'` assistant question):
 The question contained an exact quoted phrase in single quotes. Sessions containing that exact phrase now get a 60% distance reduction. The target session jumped from unranked to rank 1.
 **Fix 2 — Person name boosting** (miss: `Rachel/ukulele` temporal question):
 Sentence-embedded models give insufficient weight to person names. Capitalized proper nouns are extracted from queries; sessions mentioning that name get a 40% distance reduction. The target session jumped from unranked to rank 2.
 **Fix 3 — Memory/nostalgia patterns** (miss: `high school reunion` preference question):
 The target session said "I still remember the happy high school experiences such as being part of the debate team." Added patterns to preference extraction: `"I still remember X"`, `"I used to X"`, `"when I was in high school X"`, `"growing up X"`. This created a synthetic doc "User has mentioned: positive high school experiences, debate team, AP courses" — which the reunion question now matches. Target session jumped to rank 3.
 **Result:** All 6 question types at 100% R@5. 500/500 questions. No regressions.
 **Haiku vs. Sonnet rerank:** Both achieve 100% R@5. NDCG@10 is 0.976 (Haiku) vs 0.975 (Sonnet) — statistically identical. Haiku is ~3× cheaper. Sonnet is slightly faster at this task (2.99s/q vs 3.85s/q in our run). Either works; Haiku is the default recommendation.
 ---
 ### Parallel Approach: Palace Mode + Haiku Rerank → 99.4% (independent convergence)
 Built independently from the hybrid track. Different architecture, same ceiling.
 **Architecture:**
 ```
 PALACE
  └── HALL (concept: travel, work, health, relationships, general)
        └── Two-pass retrieval:
              Pass 1: tight search within inferred hall
              Pass 2: full haystack with hall-based score bonuses
 ```
 The palace classifies each question into one of 5 halls. Pass 1 searches only within that hall — high precision, catches the obvious match. Pass 2 searches the full corpus with the hall affinity as a tiebreaker — catches cases where the relevant session was miscategorized.
 **Why this matters:** Two completely independent architectures (hybrid scoring vs. palace navigation) converged at exactly the same score (99.4%). This is the strongest possible validation of the retrieval ceiling. The ceiling is architectural, not a local maximum of any one approach.
 ---
 ### Active Work: Diary Mode (98.2% at 65% cache coverage)
 **What it adds:** At ingest time, Claude Haiku reads each session and generates topic summaries and category labels. These become synthetic documents alongside the verbatim session.
 **Why it matters:** The hardest remaining misses are vocabulary-gap failures — the question uses different words than the session. Diary topics bridge these gaps:
 - Question: "yoga classes" → Session: "went this morning, instructor pushed me hard"
 - With diary: synthetic doc says "fitness, morning workout, yoga-style exercise" → now both match
 **Current status:** 98% cache coverage (18,803 of 19,195 sessions pre-computed). The overnight cache build is complete. Full benchmark run pending — expected to reach ≥99.4% once asymmetry from the remaining ~2% uncovered sessions is eliminated.
 ---
 ## Score Progression Summary
 | Mode | R@5 | NDCG@10 | LLM | Cost/query | Status |
 |---|---|---|---|---|---|
 | Raw ChromaDB | 96.6% | 0.889 | None | $0 | ✅ Verified |
 | Hybrid v1 | 97.8% | — | None | $0 | ✅ Verified |
 | Hybrid v2 | 98.4% | — | None | $0 | ✅ Verified |
 | Hybrid v2 + rerank | 98.8% | — | Haiku | ~$0.001 | ✅ Verified |
 | Hybrid v3 + rerank | 99.4% | 0.983 | Haiku | ~$0.001 | ✅ Verified |
 | Palace + rerank | 99.4% | 0.983 | Haiku | ~$0.001 | ✅ Verified |
 | Diary + rerank (98% cache) | 98.2% | 0.956 | Haiku | ~$0.001 | ✅ Partial — full run pending |
 | **Hybrid v4 + Haiku rerank** | **100%** | **0.976** | Haiku | ~$0.001 | ✅ Verified |
 | **Hybrid v4 + Sonnet rerank** | **100%** | **0.975** | Sonnet | ~$0.003 | ✅ Verified |
 | **Hybrid v4 held-out (450q)** | **98.4%** | **0.939** | None | $0 | ✅ Clean — never tuned on |
 ---
 ## Reproducing Every Result
 ### Setup
 ```bash
 git clone https://github.com/MemPalace/mempalace.git
 cd mempalace
 uv sync --extra dev   # or: pip install -e ".[dev]"
 mkdir -p /tmp/longmemeval-data
 curl -fsSL -o /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json
 ```
 ### Raw (96.6%) — no API key, no LLM
 ```bash
 python benchmarks/longmemeval_bench.py \
  /tmp/longmemeval-data/longmemeval_s_cleaned.json
 ```
 ### Hybrid v3, no rerank (98.4% range) — no API key
 ```bash
 python benchmarks/longmemeval_bench.py \
  /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid
 ```
 ### Hybrid v3 + Haiku rerank (99.4%) — needs API key
 ```bash
 python benchmarks/longmemeval_bench.py \
  /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid_v3 \
  --llm-rerank \
  --api-key $ANTHROPIC_API_KEY
 ```
 ### Hybrid v4 + Haiku rerank (100%) — needs API key
 ```bash
 python benchmarks/longmemeval_bench.py \
  /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid_v4 \
  --llm-rerank \
  --api-key $ANTHROPIC_API_KEY
 ```
 ### Hybrid v4 + Sonnet rerank (100%) — needs API key
 ```bash
 python benchmarks/longmemeval_bench.py \
  /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid_v4 \
  --llm-rerank \
  --llm-model claude-sonnet-4-6 \
  --api-key $ANTHROPIC_API_KEY
 ```
 ### Palace + Haiku rerank (99.4%) — needs API key
 ```bash
 python benchmarks/longmemeval_bench.py \
  /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode palace \
  --llm-rerank \
  --api-key $ANTHROPIC_API_KEY
 ```
 ### Diary + Haiku rerank (needs precomputed cache) — needs API key
 ```bash
 # First build the diary cache (one-time, ~$5-10 for all 19,195 sessions)
 python /tmp/build_diary_cache.py
 # Then run with cache
 python benchmarks/longmemeval_bench.py \
  /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode diary \
  --llm-rerank \
  --api-key $ANTHROPIC_API_KEY \
  --skip-precompute
 ```
 ### ConvoMem (92.9%)
 ```bash
 python benchmarks/convomem_bench.py --category all --limit 50
 ```
 ### LoCoMo — no rerank (60.3% at top-10)
 ```bash
 git clone https://github.com/snap-research/locomo.git /tmp/locomo
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --granularity session
 ```
 ### LoCoMo — hybrid + Sonnet rerank (100%)
 ```bash
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json \
  --mode hybrid \
  --granularity session \
  --top-k 50 \
  --llm-rerank \
  --llm-model claude-sonnet-4-6 \
  --api-key $ANTHROPIC_API_KEY
 ```
 ---
 ## The Competitive Field
 Every major AI memory system and where it stands:
 | System | Approach | LongMemEval | Requires | Notes |
 |---|---|---|---|---|
 | **MemPal** | Raw verbatim text + ChromaDB | 96.6% / 100% | Python + ChromaDB | Open source — 100% LME + 100% LoCoMo w/ rerank |
 | Supermemory | Agentic LLM search (ASMR) | ~99% (exp) / ~85% (prod) | LLM API | Production + experimental tracks |
 | Mastra | LLM observation extraction | 94.87% | GPT-5-mini | Highest validated production score |
 | Hindsight | Time-aware vector retrieval | 91.4% | LLM API | Validated by Virginia Tech |
 | Mem0 | LLM fact extraction | 30–45% (ConvoMem) | LLM API | Popular, weak on benchmarks |
 | OpenViking | Filesystem-paradigm context DB | Not published | Go + Rust + C++ + VLM | ByteDance; tested on LoCoMo10 only |
 | Letta (MemGPT) | OS-inspired LLM context mgmt | Not published | LLM API | Stateful agent architecture |
 | Zep | Graph-based memory + entity ext | Not published | LLM API + graph DB | Enterprise-focused |
 **OpenViking note:** Tested on LoCoMo10 showing 52% task completion and 91% token savings. No LongMemEval scores published. Requires Go, Rust, C++, and a VLM API — highest infrastructure burden of any system here.
 ### Tradeoffs at a Glance
 | | **MemPal** | LLM-Based (Mem0, Mastra) | Heavy Infra (OpenViking, Zep) |
 |---|---|---|---|
 | No API key needed | ✅ | ✗ | ✗ |
 | Data stays local | ✅ | Sent to API | Depends |
 | Dependencies | ChromaDB only | LLM + vector DB | Go + Rust + C++ + DB |
 | Setup time | ~2 minutes | 10–30 min | 1+ hours |
 | Cost per query | $0 | $0.001–0.01 | $0–0.01 |
 | Retrieval accuracy | 96.6% (99.4% w/ LLM) | 91–99% | Not published |
 | Multi-hop reasoning | Moderate | Strong | Strong |
 | Entity extraction | Regex patterns | LLM-powered | LLM-powered |
 ---
 ## Benchmark Integrity — The Honest Accounting
 ### What's clean and what isn't
 The 96.6% raw baseline is fully clean. No heuristics were tuned on the test set. Store verbatim text, query with ChromaDB's default embeddings, score. Exactly reproducible.
 The hybrid v4 improvements (quoted phrase boost, person name boost, nostalgia patterns) were developed by directly examining the three specific questions that failed in every prior mode:
 - `d6233ab6` — `'sexual compulsions'` assistant question → fix: quoted phrase extraction
 - `4dfccbf8` — Rachel/ukulele temporal question → fix: person name boost
 - `ceb54acb` — high school reunion preference question → fix: nostalgia patterns
 **This is teaching to the test.** The fixes were designed around the exact failure cases, not discovered by analyzing general failure patterns. The 100% result on those three questions is not a clean generalization — it's proof the specific fixes work on those specific questions.
 In a peer-reviewed paper this would be a significant methodological problem. We're disclosing it here rather than letting it sit unexamined.
 ### What the 100% result actually means
 The 96.6% → 99.4% improvements (hybrid v1–v3) are honest improvements: each was motivated by a category of failures, not specific questions. The 99.4% → 100% hybrid v4 step is three targeted fixes for three known failures.
 The three questions represent 0.6% of the dataset. It is entirely possible that:
 1. The same fixes generalize and would score well on unseen data
 2. The fixes are overfit to those three questions and harm other questions
 We don't know which, because we measured on the same questions we tuned on.
 ### The Fix: Train/Test Split
 A proper split has been created: `benchmarks/lme_split_50_450.json` (seed=42).
 - **50 dev questions** — safe to use for iterative tuning. Improvements developed on dev data are honest.
 - **450 held-out questions** — final publishable score. Touch once. Any iteration after viewing held-out results contaminates them.
 Usage:
 ```bash
 # Create a split (one-time)
 python benchmarks/longmemeval_bench.py data/... --create-split --split-file benchmarks/lme_split_50_450.json
 # Tune on dev (safe to run repeatedly)
 python benchmarks/longmemeval_bench.py data/... --mode hybrid_v4 --dev-only --split-file benchmarks/lme_split_50_450.json
 # Final evaluation — only when done tuning (results in filename tagged _held_out)
 python benchmarks/longmemeval_bench.py data/... --mode hybrid_v4 --held-out --split-file benchmarks/lme_split_50_450.json
 ```
 **The honest next number to publish is the held-out score on a fresh mode that was tuned on dev data only.** Anything else is contaminated.
 ### LoCoMo 100% — a separate caveat
 The LoCoMo 100% result with top-k=50 has a structural issue: each of the 10 conversations has 19–32 sessions, but top-k=50 exceeds that count. This means the ground-truth session is always in the candidate pool regardless of the embedding model's ranking. The Sonnet rerank is essentially doing reading comprehension over all sessions — the embedding retrieval step is bypassed entirely.
 **The honest LoCoMo score is the top-10 result: 60.3% without rerank.** A re-run at top-k=10 with the hybrid mode and rerank is the next step for a publishable LoCoMo result.
 ---
 ## Notes on Reproducibility
 **The scripts are deterministic.** Same data + same script = same result every time. ChromaDB's embeddings are deterministic. The benchmark uses a fixed dataset with no randomness.
 **The data is public.** LongMemEval, LoCoMo, and ConvoMem are all published academic datasets. Links are in the scripts.
 **The results are auditable.** Every result JSONL file in `benchmarks/results_*.jsonl` contains every question, every retrieved document, every score. You can inspect every individual answer — not just the aggregate.
 **What "retrieval recall" means here.** These scores measure whether the correct session is in the top-K retrieved results. They do *not* measure whether an LLM can correctly answer the question using that retrieval. End-to-end QA accuracy measurement requires an LLM to generate answers, which requires an API key. The retrieval measurement itself is free.
 **The LLM rerank is optional, not required.** The 96.6% baseline needs no API key at any stage — not for indexing, not for retrieval, not for scoring. The 99.4% result adds an optional Haiku rerank step that costs approximately $0.001 per question. This is standard practice: Supermemory ASMR, Mastra, and Hindsight all use LLMs in their retrieval pipelines.
 ---
 ## Results Files
 All raw results are committed:
 | File | Mode | R@5 | Notes |
 |---|---|---|---|
 | `results_raw_full500.jsonl` | raw | 96.6% | No LLM |
 | `results_hybrid_v3_rerank_full500.jsonl` | hybrid+rerank | 99.4% | Haiku |
 | `results_palace_rerank_full500.jsonl` | palace+rerank | 99.4% | Haiku |
 | `results_diary_haiku_rerank_full500.jsonl` | diary+rerank | 98.2% | 65% cache, partial |
 | `results_aaak_full500.jsonl` | aaak | 84.2% | Compressed sessions |
 | `results_rooms_full500.jsonl` | rooms | 89.4% | Session rooms |
 | `results_mempal_hybrid_v4_llmrerank_session_20260325_0930.jsonl` | hybrid_v4+rerank | 100% | Haiku, 500/500 |
 | `results_mempal_hybrid_v4_llmrerank_session_20260325_1054.jsonl` | hybrid_v4+rerank | 100% | Sonnet, LME 500/500 |
 | `results_locomo_hybrid_llmrerank_session_top50_20260325_1056.json` | locomo hybrid+rerank | 100% | Sonnet, 1986/1986 |
 | `results_lme_hybrid_v4_held_out_450_20260326_0010.json` | hybrid_v4 held-out | 98.4% R@5 | Clean — 450 unseen questions |
 | `results_locomo_hybrid_session_top10_*.json` | locomo hybrid_v5 | 88.9% R@10 | Honest — top-10, no rerank |
 | `results_locomo_palace_session_top5_20260326_0031.json` | locomo palace v2 | 75.6% R@5 | Summary-based routing, 3 rooms |
 | `results_locomo_palace_session_top10_20260326_0029.json` | locomo palace v2 | 84.8% R@10 | Summary-based routing, 3 rooms |
 | `palace_cache_locomo.json` | — | — | 272 session room assignments (Haiku) |
 | `diary_cache_haiku.json` | — | — | Pre-computed diary topics |
 ---
 ## Why We Publish This
 The results are strong enough that we don't need to stretch anything. The honest version of this story is more compelling than any hype version could be:
 - A non-commercial team built a memory system that beats commercial products with dedicated engineering.
 - The key insight is *removal*, not addition — stop trying to extract and compress memory with LLMs; just keep the words.
 - The result is reproducible by anyone with a laptop and 5 minutes.
 The arXiv paper draft is titled: *"Raw Text Beats Extracted Memory: A Zero-API Baseline for Conversational Memory Retrieval"*
 ---
 ## New Results (March 26 2026)
 ### LongMemEval held-out 450 — hybrid_v4 (no rerank, clean score)
 **98.4% R@5, 99.8% R@10 on 450 questions hybrid_v4 was never tuned on.**
 This is the honest publishable number. hybrid_v4's fixes (quoted phrase boost, person name boost, nostalgia patterns) were developed by examining 3 questions from the full 500. The held-out 450 were never seen during development.
 | Metric | Score |
 |---|---|
 | R@5 | **98.4%** (442/450) |
 | R@10 | **99.8%** (449/450) |
 | NDCG@5 | 0.939 |
 | NDCG@10 | 0.938 |
 Per-type (R@10):
 - knowledge-update: 100% (69/69)
 - multi-session: 100% (115/115)
 - single-session-assistant: 100% (54/54)
 - single-session-preference: **96.0%** (24/25) — only category with a miss
 - single-session-user: 100% (63/63)
 - temporal-reasoning: 100% (124/124)
 **Conclusion:** hybrid_v4's improvements generalize. 98.4% on unseen data vs 100% on the contaminated dev set — a 1.6pp gap. The fixes are real, not overfit. The honest claim is "98.4% R@5 on a clean held-out set, 99.8% R@10."
 Result file: `results_lme_hybrid_v4_held_out_450_20260326_0010.json`
 ---
 ### LoCoMo hybrid_v5 — honest top-10 (no rerank)
 **88.9% R@10, 72.1% single-hop** on all 1986 questions.
 The v5 fix: extracted person names from keyword overlap scoring. In LoCoMo, both speakers' names appear in every session — including them in keyword boosting gave equal signal to all sessions. Removing them lets predicate keywords ("research", "career") do the actual work.
 | Category | R@10 |
 |---|---|
 | Single-hop | 72.1% |
 | Temporal | 90.8% |
 | Temporal-inference | 70.0% |
 | Open-domain | 92.6% |
 | Adversarial | 95.3% |
 | **Overall** | **88.9%** |
 Beats Memori (81.95%) by 7pp with no reranking. Result file: `results_locomo_hybrid_session_top10_*.json`
 ---
 ### LoCoMo palace mode — LLM room assignment (RESULTS)
 **Architecture v1 (global taxonomy routing):** Haiku assigns each session to a room at index time. At query time, Haiku routes question to 1-2 rooms. **Result: 34.2% R@5** — 62.5% zero-recall. Failure: independent LLM calls with no shared context produced terminology mismatch between index-time labels and query-time routing.
 **Architecture v2 (conversation-specific routing):** Same room assignments at index time. At query time, route using keyword overlap against per-room aggregated session summaries — the *same text* used to generate the labels. No LLM calls at query time. **Result: 84.8% R@10 (3 rooms), 75.6% R@5.**
 | Version | R@5 | R@10 | Zero-recall | Notes |
 |---|---|---|---|---|
 | v1: global LLM routing | 34.2% | ~44% | 62.5% | Terminology mismatch |
 | v2: summary-based routing, top-2 rooms | 71.7% | 77.9% | 17.8% | Big fix |
 | **v2: summary-based routing, top-3 rooms** | **75.6%** | **84.8%** | **11.0%** | Best palace result |
 | Hybrid v5 (no rooms) | 83.7% | 88.9% | — | Comparison baseline |
 **Gap vs. hybrid_v5:** 4.1pp at R@10. The palace structure is working — room assignments are semantically correct (Caroline's identity dominates; Joanna+Nate in hobbies_creativity). The remaining gap is inherent to filtering: some sessions in room #4 or #5 by keyword score are missed even though they're relevant.
 **Per-category (palace v2, top-3 rooms, top-10):**
 | Category | R@10 |
 |---|---|
 | Single-hop | 65.4% |
 | Temporal | 84.1% |
 | Temporal-inference | 66.9% |
 | Open-domain | 90.1% |
 | Adversarial | 91.3% |
 | **Overall** | **84.8%** |
 Room taxonomy (14 rooms): identity_sexuality, career_education, relationships_romance, family_children, health_wellness, hobbies_creativity, social_community, home_living, travel_places, food_cooking, money_finance, emotions_mood, media_entertainment, general.
 Sample room assignments (conv-26, Caroline + Melanie):
 - 7/19 sessions → identity_sexuality (her dominant theme)
 - 6/19 sessions → family_children
 - 1/19 sessions → career_education ← where "What did Caroline research?" goes
 - 2/19 sessions → hobbies_creativity (Melanie's painting)
 Sample (conv-42, Joanna + Nate):
 - 21/29 sessions → hobbies_creativity (gaming tournaments, screenwriting, film festivals)
 Result files: `results_locomo_palace_session_top5_20260326_0031.json`, `results_locomo_palace_session_top10_20260326_0029.json`
 ---
 ### MemBench (ACL 2025) — all categories hybrid top-5
 **80.3% R@5 overall** across 8,500 items (movie + roles + events topics).
 | Category | R@5 | Notes |
 |---|---|---|
 | aggregative | **99.3%** | Combining info from multiple turns |
 | comparative | **98.4%** | Comparing two items across turns |
 | knowledge_update | **96.0%** | Facts that change over time |
 | simple | **95.9%** | Single-turn fact recall |
 | highlevel | **95.8%** | Inferences requiring aggregation |
 | lowlevel_rec | **99.8%** | Recommendations — low-level |
 | highlevel_rec | 76.2% | Recommendations — high-level |
 | post_processing | 56.6% | Post-processing tasks |
 | conditional | 57.3% | Conditional reasoning |
 | **noisy** | **43.4%** | **Distractors/irrelevant info** |
 | **Overall** | **80.3%** | 6828/8500 |
 **Strongest categories**: aggregative (99.3%), comparative (98.4%), lowlevel_rec (99.8%) — MemPal handles multi-turn fact combination extremely well.
 **Weakest**: noisy (43.4%) — questions designed with deliberate distractors and irrelevant information mixed in. This is the designed hard case for verbatim storage: when noise is indistinguishable from signal at the embedding level, retrieval degrades. Post-processing (56.6%) and conditional (57.3%) are reasoning-heavy categories where retrieval alone is insufficient.
 Result file: `results_membench_hybrid_all_top5_20260326.json`
 ---
 ## Next Benchmarks (Clean Runs)
 These are the runs needed to produce defensible, publishable numbers. None of these have been run yet.
 ### 1. Honest held-out score for hybrid_v4
 **DONE** — see above. 98.4% R@5 on 450 held-out questions.
 ### 1b. Palace mode LoCoMo (in progress)
 ```bash
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid_v4 --llm-rerank \
  --held-out --split-file benchmarks/lme_split_50_450.json \
  --llm-model claude-haiku-4-5-20251001
 ```
 **Expected:** likely still near 100% if the hybrid_v4 fixes generalize — but we don't know until we run it.
 ### 2. bge-large raw baseline (no heuristics, better embeddings)
 The question: how much of the 96.6% → 99.4% improvement is the heuristics, and how much would come from just using a better embedding model?
 ```bash
 uv pip install fastembed   # or: pip install fastembed
 uv run python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode raw --embed-model bge-large
 ```
 **Expected:** somewhere between 96.6% and 99.4%. If it's near 99.4%, the heuristics are doing less work than they appear to.
 ### 3. Honest LoCoMo — hybrid at top-10
 The 100% result used top-k=50 which exceeds the session count, making retrieval trivial. The honest number is top-k=10.
 ```bash
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json \
  --mode hybrid --granularity session \
  --top-k 10 \
  --llm-rerank --llm-model claude-haiku-4-5-20251001
 ```
 **Expected:** higher than the 60.3% raw top-10 baseline, lower than 100%.
 ### 4. bge-large on LoCoMo top-10
 Same purpose as #2: isolate the contribution of a better embedding model from the contribution of heuristics.
 ```bash
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json \
  --mode raw --granularity session --top-k 10 --embed-model bge-large
 ```
 ---
 *Results verified March 2026. Scripts and raw data committed to this repo.*
@@ -1,550 +0,0 @@
 # Hybrid Retrieval Mode — Design, Results, and Next Steps
 **Written by Lu (DTL) — March 24, 2026**
 **For: Ben**
 ---
 ## What This Is
 A detailed writeup of the hybrid retrieval modes added to `longmemeval_bench.py` during the overnight session (March 23–24) and morning session (March 24). This covers why they were built, exactly how they work, what the numbers are, and where to take it next.
 ---
 ## The Problem Hybrid Mode Solves
 The raw mode (`--mode raw`) gets **96.6% R@5** on LongMemEval. That's already excellent. But looking at the failures, two clear patterns emerged:
 **1. Specific nouns that embeddings underweight.**
 Examples of questions that failed in raw mode but pass in hybrid:
 - "What degree did I graduate with?" → answer: "Business Administration" — semantically generic, but the exact phrase is findable via keyword match
 - "What kitchen appliance did I buy?" → answer: "stand mixer" — generic appliance question, but "stand mixer" is a specific retrievable string
 - "Where did I study abroad?" → answer: "Melbourne" — city names embed poorly when surrounded by many generic context words
 The embedding model sees "Business Administration" and "Computer Science" as similarly close to "what degree did I graduate with." Keyword matching is decisive: only one document contains both "degree" and "Business Administration."
 **2. Temporal references that embeddings ignore.**
 Questions like "What was the significant business milestone I mentioned four weeks ago?" contain a time anchor that embeddings don't use at all. The correct session was always semantically in the top-50 — but not ranked first because the temporal signal was invisible to embeddings. A date-proximity boost fixes this.
 ---
 ## How Hybrid Mode Works (`--mode hybrid`)
 Two stages, no LLM calls, no added dependencies:
 ### Stage 1: Semantic retrieval (same as raw)
 Query ChromaDB with the question text. Retrieve **top 50** candidates (raw uses 10, hybrid uses 50 to give stage 2 more to work with).
 ### Stage 2: Keyword re-ranking
 Extract meaningful keywords from the question (strip stop words). For each retrieved document, compute keyword overlap score. Apply a **distance reduction** proportional to overlap:
 ```python
 fused_dist = dist * (1.0 - 0.30 * overlap)
 ```
 **Breaking this formula down:**
 - `dist` — ChromaDB cosine distance (lower = better match)
 - `overlap` — fraction of question keywords found in the document (0.0 to 1.0)
 - `0.30` — the boost weight: up to 30% distance reduction for perfect keyword overlap
 **Example:**
 - Document A: dist=0.45, overlap=0.0 → fused=0.450 (no change)
 - Document B: dist=0.52, overlap=1.0 → fused=0.364 (30% better — jumps ahead of A)
 After re-ranking, sort by fused_dist ascending. The final ranked list is returned.
 ### Stop word list
 The keyword extractor strips common words that add noise:
 ```python
 STOP_WORDS = {
    "what", "when", "where", "who", "how", "which", "did", "do",
    "was", "were", "have", "has", "had", "is", "are", "the", "a",
    "an", "my", "me", "i", "you", "your", "their", "it", "its",
    "in", "on", "at", "to", "for", "of", "with", "by", "from",
    "ago", "last", "that", "this", "there", "about", "get", "got",
    "give", "gave", "buy", "bought", "made", "make",
 }
 ```
 Only words 3+ characters that aren't stop words count as keywords.
 ---
 ## How Hybrid V2 Works (`--mode hybrid_v2`)
 Three targeted fixes on top of hybrid, each addressing a specific failure category found by analyzing the exact 11 questions that hybrid v1 missed.
 ### Fix 1: Temporal date boost
 LongMemEval entries include a `question_date` field — the date the question was asked. Sessions have timestamps. Questions like "four weeks ago" or "last month" have a mathematically correct answer: the session that falls nearest to `question_date - offset`.
 ```python
 # Parse the temporal reference from the question
 days_offset, window_days = parse_time_offset_days(question)
 # Compute the target date
 target_date = question_date - timedelta(days=days_offset)
 # For each session, measure proximity to target_date
 days_diff = abs((session_date - target_date).days)
 # Apply up to 40% distance reduction for sessions within the window
 temporal_boost = max(0.0, 0.40 * (1.0 - days_diff / window_days))
 fused_dist = fused_dist * (1.0 - temporal_boost)
 ```
 Temporal patterns handled: `"N days ago"`, `"a couple of days ago"`, `"a week ago"`, `"N weeks ago"`, `"last week"`, `"a month ago"`, `"N months ago"`, `"recently"`.
 ### Fix 2: Two-pass retrieval for assistant-reference questions
 Questions like "You suggested X, can you remind me..." refer to what the *assistant* said — but the standard index only stores user turns. A naive fix (index all turns globally) dilutes the semantic signal.
 The two-pass approach is targeted:
 ```python
 # Pass 1: find top-5 sessions using user-turn-only index (fast, focused)
 top_sessions = semantic_search(user_turns_only, question, top_k=5)
 # Pass 2: for those 5 sessions only, re-index with FULL text (user + assistant)
 #          then re-query with the original question
 full_text_collection = build_collection(top_sessions, include_assistant=True)
 results = semantic_search(full_text_collection, question, top_k=5)
 ```
 This gives assistant-reference questions a full-text index to search, without polluting the global index that semantic questions depend on.
 Detection heuristic:
 ```python
 triggers = ["you suggested", "you told me", "you mentioned", "you said",
            "you recommended", "remind me what you", "you provided",
            "you listed", "you gave me", "you described", "what did you",
            "you came up with", "you helped me", "you explained",
            "can you remind me", "you identified"]
 ```
 ### Fix 3: Hybrid keyword boost (same as v1)
 All the v1 keyword re-ranking applied on top of fixes 1 and 2.
 ---
 ## Results
 ### LongMemEval (500 questions, session granularity)
 | Mode | R@5 | R@10 | NDCG@10 | vs Raw |
 |------|-----|------|---------|--------|
 | **Raw (baseline)** | 96.6% | 98.2% | 0.889 | — |
 | **Hybrid v1 w=0.30** | 97.8% | 98.8% | 0.930 | +1.2pp / +0.6pp / +0.041 |
 | **Hybrid v2 w=0.30** | 98.4% | 99.0% | 0.934 | +1.8pp / +0.8pp / +0.045 |
 | **Hybrid v2 + LLM rerank** | 98.8% | 99.0% | 0.966 | +2.2pp / +0.8pp / +0.077 |
 | **Hybrid v3 + LLM rerank** | 99.4% | 99.6% | 0.975 | +2.8pp / +1.4pp / +0.086 |
 | **Palace + LLM rerank** | **99.4%** | **99.4%** | **0.973** | **+2.8pp / +1.2pp / +0.084** |
 | **Diary + LLM rerank (65% cache)** | 98.2% | 98.4% | 0.956 | +1.6pp / +0.2pp / +0.067 |
 **+2.8 percentage points at R@5 vs raw** = 14 more questions answered correctly out of 500.
 **Both v3 and palace reach 99.4% R@5** — two independent architectures converging on the same ceiling.
 **Only 3 misses remain** across both top modes.
 **Diary result (98.2%) is with 65% cache coverage only** — 35% of sessions had no diary context. Full-coverage result pending (cache building overnight). The partial result shows the diary layer can introduce noise when only partially applied; full coverage result expected to be ≥99.4%.
 Per-type R@5 breakdown (hybrid v3 + LLM rerank):
 - knowledge-update: **100%** (n=78)
 - multi-session: **100%** (n=133)
 - single-session-user: **100%** (n=70)
 - temporal-reasoning: **99.2%** (n=133)
 - single-session-assistant: **98.2%** (n=56)
 - single-session-preference: **96.7%** (n=30)
 ### Remaining 3 misses (after hybrid v3 + LLM rerank)
 **Only 3 questions remain unresolved out of 500.**
 Hybrid v3 fixed the preference and assistant failures that v2 left behind:
 - preference: 93.3% → **96.7%** (synthetic preference docs bridged the vocabulary gap)
 - assistant: 96.4% → **98.2%** (expanded top-20 rerank pool caught rank-11-12 sessions)
 - temporal: 98.5% → **99.2%**
 The 3 remaining misses are edge cases — likely irreducible without deeper semantic reasoning than a single Haiku pick can provide. At 99.4% R@5, this is at or near the practical ceiling for session-granularity retrieval on LongMemEval.
 ### Weight tuning — full 500-question results
 Ran experiments across 5 weights. 100-question samples showed 99% R@5 at w=0.40, but the full 500 reveals this was sampling variance. On all 500 questions, 0.30 and 0.40 are essentially equivalent:
 | Weight | N | R@5 | R@10 | NDCG@10 | Notes |
 |--------|---|-----|------|---------|-------|
 | 0.10 | 100 | 97.0% | 100.0% | 0.909 | too conservative |
 | 0.20 | 100 | 98.0% | 100.0% | 0.934 | good |
 | **0.30** | **500** | **97.8%** | **98.8%** | **0.930** | **default — best R@5** |
 | 0.40 | 500 | 97.4% | 98.8% | 0.932 | within noise |
 | 0.50 | 100 | 99.0% | 100.0% | 0.953 | sample variance |
 | 0.60 | 100 | 99.0% | 100.0% | 0.955 | sample variance |
 **Conclusion:** Default stays at 0.30. The 100-question experiments overfit to that specific sample. Full 500 is ground truth.
 ### Verified: all 500 questions scored, no memory wall
 `EphemeralClient` (in-memory ChromaDB) eliminates the Q388 hang entirely. The benchmark now runs clean end-to-end without the split trick. Split is still supported for very long runs but no longer needed.
 ```bash
 # Simple single run — no split needed
 python benchmarks/longmemeval_bench.py data/longmemeval_s_cleaned.json --mode hybrid_v2
 ```
 ---
 ## Reproducing the Results
 ```bash
 # Setup
 git clone https://github.com/MemPalace/mempalace.git
 cd mempalace
 uv sync --extra dev   # or: pip install -e ".[dev]"
 # Download data
 mkdir -p /tmp/longmemeval-data
 curl -fsSL -o /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json
 # Run palace + LLM rerank (requires API key)
 export ANTHROPIC_API_KEY=sk-ant-...  # or use --llm-key flag
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode palace --llm-rerank --out benchmarks/results_palace_llmrerank_full500.jsonl
 # Run hybrid v3 + LLM rerank (requires API key)
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid_v3 --llm-rerank
 # Expected output:
 # R@5: 99.4%  R@10: 99.6%  NDCG@10: 0.975
 # Run hybrid v2 + LLM rerank (local-friendly, no preference extraction)
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid_v2 --llm-rerank
 # Expected output:
 # R@5: 98.8%  R@10: 99.0%  NDCG@10: 0.966
 # Run hybrid v2 without LLM (local-only, no API key needed)
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid_v2
 # Expected output:
 # R@5: 98.4%  R@10: 99.0%  NDCG@10: 0.934
 # Run hybrid v1 for comparison
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid
 # Expected output:
 # R@5: 97.8%  R@10: 98.8%  NDCG@10: 0.930
 # Tune the keyword boost weight
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode hybrid --hybrid-weight 0.40 --limit 100
 ```
 **Run time:**
 - hybrid_v2 (local): ~200s for full 500 on Apple Silicon
 - hybrid_v2 + LLM rerank: ~620s (~10 min) — adds ~0.8s per question for Haiku API call
 - palace (local): ~280s — slightly slower due to two-pass hall navigation
 - palace + LLM rerank: ~700s (~12 min)
 ---
 ## How Palace Mode Works (`--mode palace`)
 Palace mode is a structural upgrade that uses the full MemPal hall/wing/closet/drawer architecture for retrieval. Instead of searching everything flat, it navigates into the most likely hall first, then falls back to the full haystack with hall-aware scoring.
 ### The Palace Structure
 ```
 PALACE
  └── HALL (content type: preferences / facts / events / assistant_advice / general)
        └── CLOSET (user turns per session — the primary index)
              └── DRAWER (assistant turns — opened on demand for assistant-reference questions)
  └── PREFERENCE WING (synthetic docs extracted from user expressions — separate from halls)
 ```
 ### Hall Classification
 Every session is classified into one of 5 halls at ingest time:
 - **hall_preferences** — sessions about what the user likes, hates, avoids, or tends to do
 - **hall_facts** — sessions about biographical facts: job, location, education, family
 - **hall_events** — sessions about things that happened: trips, purchases, achievements
 - **hall_assistant_advice** — sessions where the user asked for recommendations or opinions
 - **hall_general** — everything else
 Questions are classified the same way. "Where do I work?" → `hall_facts`. "What did I buy recently?" → `hall_events`. "What did you recommend for X?" → `hall_assistant_advice`.
 ### Two-Pass Navigation
 **Pass 1 — Navigate to primary hall (tight search):**
 For questions with a specific hall match, search only that hall's closet collection. Smaller pool = less noise = tighter results. For questions classified as `hall_general`, skip Pass 1 entirely — no benefit from narrowing to an uncategorized bucket.
 Sessions found in Pass 1 are "hall-validated" — they appear in both the tight hall search and the full search.
 **Pass 2 — Full haystack with hall-aware scoring:**
 Search all sessions with hybrid scoring, plus:
 - 25% distance reduction for sessions in the primary hall (strong signal)
 - 10% distance reduction for sessions in secondary halls
 - 15% extra reduction for sessions that were hall-validated in Pass 1 (double confirmation)
 **The key insight:** Halls *reduce noise* by narrowing the initial search pool, but the final ranking is always score-based — hall navigation is a boost, not an override. This prevents the case where wrong hall sessions pre-empt the correct answer.
 ### Drawer Access (for `hall_assistant_advice` questions only)
 Drawers = assistant turns. They're indexed separately and only opened when the question targets `hall_assistant_advice`. This avoids polluting the semantic index (which finds the right *session*) while still enabling full-text search within the right sessions for "what did you tell me about X" questions.
 ### Preference Wing
 Same as hybrid_v3: 16 regex patterns extract preference expressions from user turns at ingest time. Synthetic documents ("User has mentioned: X; Y") are stored in a separate preference wing with the same session ID. For preference questions, the preference wing is included in Pass 1 — it directly bridges the vocabulary gap between question phrasing and session text.
 ---
 ## How Diary Mode Works (`--mode diary`)
 Diary mode is palace mode + an LLM topic layer added at ingest time. It addresses the vocabulary gap that embeddings can't bridge — where the question uses completely different words than the session.
 ### The Problem It Solves
 Palace mode still misses questions like: *"Where do I take yoga classes?"* when the relevant session only says *"I went this morning, my instructor was great."* No keyword overlap, no semantic bridge. The embedding sees "yoga classes" vs "went this morning" — too different.
 ### How It Works
 Before the benchmark loop, every unique session is processed by Haiku once:
 ```python
 prompt = (
    "Read this conversation excerpt (user turns only) and extract:\n"
    "Return a JSON object: {\"topics\": [\"specific topic 1\", ...], \"summary\": \"1-2 sentences\"}\n"
    "Rules: topics must be SPECIFIC."
 )
 # Returns: {"topics": ["yoga classes", "Tuesday routine", "workout schedule"], "summary": "..."}
 ```
 A synthetic document is added to the ChromaDB collection with the **same corpus_id**:
 ```
 "Session topics: yoga classes, Tuesday routine, workout schedule. Summary: ..."
 ```
 Now "yoga classes" matches the question directly. The evaluation maps the synthetic doc back to the correct session because they share a corpus_id.
 ### Pre-computation and Caching
 19,195 unique sessions in the 500-question dataset. Processing all at ~1s/session = ~5 hours. Caching solves this:
 ```bash
 # First run: builds cache
 python benchmarks/longmemeval_bench.py ... --mode diary --diary-cache benchmarks/diary_cache_haiku.json
 # Subsequent runs: instant (loads cache, zero API calls for pre-computation)
 python benchmarks/longmemeval_bench.py ... --mode diary --diary-cache benchmarks/diary_cache_haiku.json
 ```
 The `--skip-precompute` flag skips pre-computation and uses the cache as-is, falling back to pure palace for uncached sessions.
 ### LLM Rerank compatibility
 `--llm-rerank` works with diary mode. The reranker sees the full enriched corpus (including diary synthetic docs) when selecting the best session. This is the full stack.
 ```bash
 # Full diary + rerank run (requires complete cache for best results)
 export ANTHROPIC_API_KEY=sk-ant-...
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  --mode diary --llm-rerank --diary-cache benchmarks/diary_cache_haiku.json
 ```
 ### Note on Cache Coverage
 The partial-coverage run (65% cache, 35% fell back to palace) gave R@5=98.2% — lower than palace+rerank at 99.4%. Partial diary coverage introduces vocabulary-bridging docs for some sessions but not others, creating retrieval asymmetry. Full-coverage result (100% sessions with diary topics) is expected to equal or beat 99.4%.
 ---
 ## How Hybrid V3 Works (`--mode hybrid_v3`)
 Hybrid v2 + two targeted fixes for the remaining 6 misses.
 ### Fix 1: Preference extraction at ingest
 Scans every user turn for expressions of preference, concern, or intent using 16 regex patterns:
 ```python
 PREF_PATTERNS = [
    r"i've been having (?:trouble|issues?|problems?) with X",
    r"i've been feeling X",
    r"i've been (?:struggling|dealing) with X",
    r"i(?:'m| am) (?:worried|concerned) about X",
    r"i prefer X",
    r"i usually X",
    r"i want to X",
    r"i'm thinking (?:about|of) X",
    r"lately[,\s]+i've been X",
    r"recently[,\s]+i've been X",
    r"i've been (?:working on|focused on|interested in) X",
    # ... 5 more
 ]
 ```
 For sessions where preferences are extracted, a synthetic document is added to ChromaDB alongside the session document — with the **same corpus_id**:
 ```
 "User has mentioned: battery life issues on phone; looking at phone upgrade options"
 ```
 This document ranks near the top for "I've been having trouble with battery life" even when the session text never uses those exact words. The evaluation correctly maps it to the right session.
 ### Fix 2: Expanded LLM rerank pool (20 instead of 10)
 Some assistant-reference failures had the correct session at rank 11-12 — just outside the window Haiku sees. Expanding to top-20 catches these with negligible prompt cost.
 ## How LLM Re-ranking Works (`--llm-rerank`)
 An optional fourth pass that works with any retrieval mode. Add `--llm-rerank` to any run.
 ```python
 # After hybrid_v2 retrieval, take top-10 sessions
 # Send question + numbered session snippets (500 chars each) to Haiku
 # Haiku picks the single most relevant session number
 # That session is promoted to rank 1; rest stay in hybrid_v2 order
 ```
 **The prompt (minimal by design):**
 ```
 Question: {question}
 Below are 10 conversation sessions from someone's memory. Which single session
 is most likely to contain the answer? Reply with ONLY a number between 1 and 10.
 Session 1: {text[:500]}
 ...
 Session 10: {text[:500]}
 Most relevant session number:
 ```
 **Why this works for preference failures:**
 Embeddings can't bridge "battery life on my phone" → phone hardware research session because the vocabulary doesn't overlap. Haiku reasons about intent: "someone asking about battery problems likely had a session about phone hardware." This is the semantic gap that LLMs exist to close.
 **Why only 1 pick (not a full ranking):**
 Asking for a full ranking increases prompt complexity and error rate. Picking the single best is decisive and reliable. The rest of the ranking stays in hybrid_v2 order, which is already excellent.
 **Graceful degradation:**
 If the API call fails (timeout, rate limit, no key), the function catches the exception and returns the original hybrid_v2 ranking unchanged. The benchmark never crashes due to the LLM pass.
 **Key loading priority:**
 1. `--llm-key` CLI flag
 2. `ANTHROPIC_API_KEY` environment variable
 ## What Changed in the Code
 ### 1. EphemeralClient (no more Q388 hang)
 All five `PersistentClient + tmpdir` patterns replaced with a module-level singleton:
 ```python
 _bench_client = chromadb.EphemeralClient()
 def _fresh_collection(name="mempal_drawers"):
    try:
        _bench_client.delete_collection(name)
    except Exception:
        pass
    return _bench_client.create_collection(name)
 ```
 Benefits:
 - No temp files, no SQLite handles accumulating
 - ~2x faster per question (no disk I/O)
 - Full 500 runs without splitting
 ### 2. `--hybrid-weight` CLI flag
 ```python
 parser.add_argument("--hybrid-weight", type=float, default=0.30,
                    help="Keyword boost weight for hybrid mode (default: 0.30)")
 ```
 ### 3. `--mode hybrid_v2` added to choices
 Full function `build_palace_and_retrieve_hybrid_v2()` with temporal boost and two-pass assistant retrieval. See `longmemeval_bench.py` lines ~406–560.
 ### 4. LoCoMo default top-k: 10 → 50
 Going from top-10 to top-50 on LoCoMo was free performance (+17pp on dialog granularity). Updated default in `locomo_bench.py`.
 ---
 ## Where to Go Next
 The 5 remaining misses fall into two tractable categories:
 ### 1. Preference extraction at ingest time
 2 of 5 remaining failures are "preference" questions where the question contains no searchable terms from the relevant session. The fix requires annotating sessions at ingest:
 - Detect "I prefer X", "I usually do Y", "I've been having trouble with Z" patterns
 - Store a separate preference document per detected preference
 - Boost preference documents when question looks like a preference query
 Expected: catch 1–2 of the 2 remaining preference failures. New R@5: **~98.8%**.
 ### 2. LLM-assisted re-ranking
 For jargon-dense questions ("Hardware-Aware Modular Training") and context-gap questions ("business milestone"), a lightweight LLM re-ranker as a third pass could close the remaining gap:
 - Retrieve top-10 sessions via hybrid_v2
 - Ask a small LLM: "Given this question, which session is most relevant? Rank these 10."
 - Re-order based on LLM output
 This would add one LLM call per question — stays under 1 second with a fast model (Haiku). But breaks the "no API key" guarantee for local-only deployments.
 ### 3. The 99% ceiling
 The 5 remaining failures include at least 2 that are arguably ambiguous — the question could reasonably retrieve multiple sessions. 99% may be the practical ceiling for session-granularity retrieval on LongMemEval without LLM assistance.
 ---
 ## File Map
 ```
 benchmarks/
  longmemeval_bench.py                         — main benchmark + all modes
  locomo_bench.py                              — LoCoMo benchmark (top-k default now 50)
  results_hybrid_full500_merged.jsonl          — hybrid v1 results (R@5=97.8%)
  results_hybrid_w040_full500_merged.jsonl     — hybrid v1 w=0.40 comparison (R@5=97.4%)
  results_hybrid_v2_full500_merged.jsonl       — hybrid v2 results (R@5=98.4%)
  results_hybrid_v2_llmrerank_full500.jsonl    — hybrid v2 + LLM rerank (R@5=98.8%)
  results_hybrid_v3_llmrerank_full500.jsonl    — hybrid v3 + LLM rerank (R@5=99.4%, NDCG=0.975) ← CURRENT BEST (tied)
  results_palace_full500.jsonl                 — palace mode (R@5=97.2%, no rerank)
  results_palace_llmrerank_full500.jsonl       — palace + LLM rerank (R@5=99.4%, NDCG=0.973) ← CURRENT BEST (tied)
  results_diary_haiku_rerank_full500.jsonl     — diary + LLM rerank, 65% cache (R@5=98.2%) ← partial, full pending
  diary_cache_haiku.json                       — pre-computed Haiku topics for 3977+ sessions (building to 19195)
  NOTES_FOR_MILLA.md                           — Ben's full analysis + paper discussion
  HYBRID_MODE.md                               — this file
 ```
 ---
 ## Key Design Decisions and Why
 **Why 30% keyword boost?**
 Strong enough to flip edge cases (a semantically ambiguous doc with perfect keyword overlap), not so strong it overrides clearly-better semantic results. Full 500-question validation confirms 0.30 is optimal. Higher weights show no improvement on the full set.
 **Why top-50 retrieval then re-rank?**
 Larger candidate pool gives keyword re-ranking more to work with. If the answer is at position 45 semantically but has perfect keyword overlap, we need it in the pool to promote it. Cost: ChromaDB returns slightly more data per query. Impact on speed: negligible.
 **Why two-pass instead of global assistant indexing?**
 Global assistant indexing dilutes the semantic signal — every session's assistant text competes with every other. Two-pass is surgical: use user turns to find the right session first, then use full text only within that session. Tested both approaches; two-pass wins.
 **Why no LLM calls?**
 The whole MemPal pitch is "no API key, no cloud." Hybrid and hybrid_v2 maintain this. Everything is local string matching and date arithmetic.
 **Why only 40% temporal boost (not 100%)?**
 Temporal proximity is a strong signal but not definitive. A 40% maximum reduction means semantically excellent matches can't be completely overridden by date proximity alone. It's a hint, not a rule.
 ---
 ## Contact
 Questions → Milla (Aya) will relay to Lu. Or push changes to `ben/benchmarking` and Lu will review next session.
@@ -1,124 +0,0 @@
 # MemPalace Benchmarks — Reproduction Guide
 Run the exact same benchmarks we report. Clone, install, run.
 ## Setup
 ```bash
 git clone https://github.com/MemPalace/mempalace.git
 cd mempalace
 uv sync --extra dev   # or: pip install -e ".[dev]"
 ```
 ## Benchmark 1: LongMemEval (500 questions)
 Tests retrieval across ~53 conversation sessions per question. The standard benchmark for AI memory.
 ```bash
 # Download data
 mkdir -p /tmp/longmemeval-data
 curl -fsSL -o /tmp/longmemeval-data/longmemeval_s_cleaned.json \
  https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json
 # Run (raw mode — our headline 96.6% result)
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json
 # Run with AAAK compression (84.2%)
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --mode aaak
 # Run with room-based boosting (89.4%)
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --mode rooms
 # Quick test on 20 questions first
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --limit 20
 # Turn-level granularity
 python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --granularity turn
 ```
 **Expected output (raw mode, full 500):**
 ```
 Recall@5:  0.966
 Recall@10: 0.982
 NDCG@10:   0.889
 Time:      ~5 minutes on Apple Silicon
 ```
 ## Benchmark 2: LoCoMo (1,986 QA pairs)
 Tests multi-hop reasoning across 10 long conversations (19-32 sessions each, 400-600 dialog turns).
 ```bash
 # Clone LoCoMo
 git clone https://github.com/snap-research/locomo.git /tmp/locomo
 # Run (session granularity — our 60.3% result)
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --granularity session
 # Dialog granularity (harder — 48.0%)
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --granularity dialog
 # Higher top-k (77.8% at top-50)
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --top-k 50
 # Quick test on 1 conversation
 python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --limit 1
 ```
 **Expected output (session, top-10, full 10 conversations):**
 ```
 Avg Recall: 0.603
 Temporal:   0.692
 Time:       ~2 minutes
 ```
 ## Benchmark 3: ConvoMem (Salesforce, 75K+ QA pairs)
 Tests six categories of conversational memory. Downloads from HuggingFace automatically.
 ```bash
 # Run all categories, 50 items each (our 92.9% result)
 python benchmarks/convomem_bench.py --category all --limit 50
 # Single category
 python benchmarks/convomem_bench.py --category user_evidence --limit 100
 # Quick test
 python benchmarks/convomem_bench.py --category user_evidence --limit 10
 ```
 **Categories available:** `user_evidence`, `assistant_facts_evidence`, `changing_evidence`, `abstention_evidence`, `preference_evidence`, `implicit_connection_evidence`
 **Expected output (all categories, 50 each):**
 ```
 Avg Recall: 0.929
 Assistant Facts: 1.000
 User Facts:      0.980
 Time:            ~2 minutes
 ```
 ## What Each Benchmark Tests
 | Benchmark | What it measures | Why it matters |
 |---|---|---|
 | **LongMemEval** | Can you find a fact buried in 53 sessions? | Tests basic retrieval quality — the "needle in a haystack" |
 | **LoCoMo** | Can you connect facts across conversations over weeks? | Tests multi-hop reasoning and temporal understanding |
 | **ConvoMem** | Does your memory system work at scale? | Tests all memory types: facts, preferences, changes, abstention |
 ## Results Files
 Raw results are in `benchmarks/results_*.jsonl` and `benchmarks/results_*.json`. Each file contains every question, every retrieved document, and every score — fully auditable.
 ## Requirements
 - Python 3.9+
 - `chromadb` (the only dependency)
 - ~300MB disk for LongMemEval data
 - ~5 minutes for each full benchmark run
 - No API key. No internet during benchmark (after data download). No GPU.
 ## Next Benchmarks (Planned)
 - **Scale testing** — ConvoMem at 50/100/300 conversations per item
 - **Hybrid AAAK** — search raw text, deliver AAAK-compressed results
 - **End-to-end QA** — retrieve + generate answer + measure F1 (needs LLM API key)
@@ -1,342 +0,0 @@
 #!/usr/bin/env python3
 """
 MemPal × ConvoMem Benchmark
 ==============================
 Evaluates MemPal's retrieval against the ConvoMem benchmark.
 75,336 QA pairs across 6 evidence categories.
 For each evidence item:
 1. Ingest all conversations into a fresh MemPal palace (one drawer per message)
 2. Query with the question
 3. Check if any retrieved message matches the evidence messages
 Since ConvoMem has 75K items across many files, we sample a subset for benchmarking.
 Downloads evidence files from HuggingFace on first run.
 Usage:
    python benchmarks/convomem_bench.py                          # sample 100 items
    python benchmarks/convomem_bench.py --limit 500              # sample 500 items
    python benchmarks/convomem_bench.py --category user_evidence  # one category only
    python benchmarks/convomem_bench.py --mode aaak              # test AAAK compression
 """
 import os
 import sys
 import json
 import shutil
 import tempfile
 import argparse
 import urllib.request
 from pathlib import Path
 from collections import defaultdict
 from datetime import datetime
 import chromadb
 sys.path.insert(0, str(Path(__file__).parent.parent))
 HF_BASE = "https://huggingface.co/datasets/Salesforce/ConvoMem/resolve/main/core_benchmark/evidence_questions"
 CATEGORIES = {
    "user_evidence": "User Facts",
    "assistant_facts_evidence": "Assistant Facts",
    "changing_evidence": "Changing Facts",
    "abstention_evidence": "Abstention",
    "preference_evidence": "Preferences",
    "implicit_connection_evidence": "Implicit Connections",
 }
 # Sample files per category (1_evidence = single-message evidence, simplest)
 SAMPLE_FILES = {
    "user_evidence": "1_evidence/0050e213-5032-42a0-8041-b5eef2f8ab91_Telemarketer.json",
    "assistant_facts_evidence": None,  # will discover
    "changing_evidence": None,
    "abstention_evidence": None,
    "preference_evidence": None,
    "implicit_connection_evidence": None,
 }
 # =============================================================================
 # DATA LOADING
 # =============================================================================
 def download_evidence_file(category, subpath, cache_dir):
    """Download a single evidence file from HuggingFace."""
    url = f"{HF_BASE}/{category}/{subpath}"
    cache_path = os.path.join(cache_dir, category, subpath.replace("/", "_"))
    os.makedirs(os.path.dirname(cache_path), exist_ok=True)
    if os.path.exists(cache_path):
        with open(cache_path) as f:
            return json.load(f)
    print(f"    Downloading: {category}/{subpath}...")
    try:
        urllib.request.urlretrieve(url, cache_path)
        with open(cache_path) as f:
            return json.load(f)
    except Exception as e:
        print(f"    Failed to download {url}: {e}")
        return None
 def discover_files(category, cache_dir):
    """Discover available files for a category via HuggingFace API."""
    api_url = f"https://huggingface.co/api/datasets/Salesforce/ConvoMem/tree/main/core_benchmark/evidence_questions/{category}/1_evidence"
    cache_path = os.path.join(cache_dir, f"{category}_filelist.json")
    if os.path.exists(cache_path):
        with open(cache_path) as f:
            return json.load(f)
    try:
        req = urllib.request.Request(api_url)
        with urllib.request.urlopen(req, timeout=15) as resp:
            files = json.loads(resp.read())
            paths = [
                f["path"].split(f"{category}/")[1] for f in files if f["path"].endswith(".json")
            ]
            os.makedirs(os.path.dirname(cache_path), exist_ok=True)
            with open(cache_path, "w") as f:
                json.dump(paths, f)
            return paths
    except Exception as e:
        print(f"    Failed to list files for {category}: {e}")
        return []
 def load_evidence_items(categories, limit, cache_dir):
    """Load evidence items from specified categories."""
    all_items = []
    for category in categories:
        # Discover files
        files = discover_files(category, cache_dir)
        if not files:
            # Fallback to known file
            known = SAMPLE_FILES.get(category)
            if known:
                files = [known]
            else:
                print(f"  Skipping {category} — no files found")
                continue
        # Download files until we have enough items
        items_for_cat = []
        for fpath in files:
            if len(items_for_cat) >= limit:
                break
            data = download_evidence_file(category, fpath, cache_dir)
            if data and "evidence_items" in data:
                for item in data["evidence_items"]:
                    item["_category_key"] = category
                    items_for_cat.append(item)
        all_items.extend(items_for_cat[:limit])
        print(f"  {CATEGORIES.get(category, category)}: {len(items_for_cat[:limit])} items loaded")
    return all_items
 # =============================================================================
 # RETRIEVAL
 # =============================================================================
 def retrieve_for_item(item, top_k=10, mode="raw"):
    """
    Ingest conversations, query, check if evidence was retrieved.
    Returns:
        recall: float (fraction of evidence messages found in top-k)
        details: dict with retrieved texts and match info
    """
    conversations = item.get("conversations", [])
    question = item["question"]
    evidence_messages = item.get("message_evidences", [])
    evidence_texts = set(e["text"].strip().lower() for e in evidence_messages)
    # Build corpus: one doc per message
    corpus = []
    corpus_speakers = []
    for conv in conversations:
        for msg in conv.get("messages", []):
            corpus.append(msg["text"])
            corpus_speakers.append(msg["speaker"])
    if not corpus:
        return 0.0, {"error": "empty corpus"}
    tmpdir = tempfile.mkdtemp(prefix="mempal_convomem_")
    palace_path = os.path.join(tmpdir, "palace")
    try:
        client = chromadb.PersistentClient(path=palace_path)
        collection = client.create_collection("mempal_drawers")
        # Optionally compress
        if mode == "aaak":
            from mempalace.dialect import Dialect
            dialect = Dialect()
            docs = [dialect.compress(doc) for doc in corpus]
        else:
            docs = corpus
        collection.add(
            documents=docs,
            ids=[f"msg_{i}" for i in range(len(corpus))],
            metadatas=[{"speaker": s, "idx": i} for i, s in enumerate(corpus_speakers)],
        )
        results = collection.query(
            query_texts=[question],
            n_results=min(top_k, len(corpus)),
            include=["documents", "metadatas"],
        )
        # Check if any retrieved message matches evidence
        retrieved_indices = [m["idx"] for m in results["metadatas"][0]]
        retrieved_texts = [corpus[i].strip().lower() for i in retrieved_indices]
        found = 0
        for ev_text in evidence_texts:
            for ret_text in retrieved_texts:
                if ev_text in ret_text or ret_text in ev_text:
                    found += 1
                    break
        recall = found / len(evidence_texts) if evidence_texts else 1.0
        return recall, {
            "retrieved_count": len(retrieved_indices),
            "evidence_count": len(evidence_texts),
            "found": found,
        }
    finally:
        shutil.rmtree(tmpdir, ignore_errors=True)
 # =============================================================================
 # BENCHMARK RUNNER
 # =============================================================================
 def run_benchmark(categories, limit_per_cat, top_k, mode, cache_dir, out_file):
    """Run the ConvoMem retrieval benchmark."""
    print(f"\n{'=' * 60}")
    print("  MemPal × ConvoMem Benchmark")
    print(f"{'=' * 60}")
    print(f"  Categories:  {len(categories)}")
    print(f"  Limit/cat:   {limit_per_cat}")
    print(f"  Top-k:       {top_k}")
    print(f"  Mode:        {mode}")
    print(f"{'─' * 60}")
    print("\n  Loading data from HuggingFace...\n")
    items = load_evidence_items(categories, limit_per_cat, cache_dir)
    print(f"\n  Total items: {len(items)}")
    print(f"{'─' * 60}\n")
    all_recall = []
    per_category = defaultdict(list)
    results_log = []
    start_time = datetime.now()
    for i, item in enumerate(items):
        question = item["question"]
        answer = item.get("answer", "")
        cat_key = item.get("_category_key", "unknown")
        CATEGORIES.get(cat_key, cat_key)
        recall, details = retrieve_for_item(item, top_k=top_k, mode=mode)
        all_recall.append(recall)
        per_category[cat_key].append(recall)
        results_log.append(
            {
                "question": question,
                "answer": answer,
                "category": cat_key,
                "recall": recall,
                "details": details,
            }
        )
        status = "HIT" if recall >= 1.0 else ("part" if recall > 0 else "miss")
        if (i + 1) % 20 == 0 or i == len(items) - 1:
            print(
                f"  [{i + 1:4}/{len(items)}] avg_recall={sum(all_recall) / len(all_recall):.3f}  last={status}"
            )
    elapsed = (datetime.now() - start_time).total_seconds()
    avg_recall = sum(all_recall) / len(all_recall) if all_recall else 0
    print(f"\n{'=' * 60}")
    print(f"  RESULTS — MemPal ({mode} mode, top-{top_k})")
    print(f"{'=' * 60}")
    print(f"  Time:        {elapsed:.1f}s ({elapsed / max(len(items), 1):.2f}s per item)")
    print(f"  Items:       {len(items)}")
    print(f"  Avg Recall:  {avg_recall:.3f}")
    print("\n  PER-CATEGORY RECALL:")
    for cat_key in sorted(per_category.keys()):
        vals = per_category[cat_key]
        avg = sum(vals) / len(vals)
        name = CATEGORIES.get(cat_key, cat_key)
        perfect = sum(1 for v in vals if v >= 1.0)
        print(f"    {name:25} R={avg:.3f}  perfect={perfect}/{len(vals)}")
    perfect_total = sum(1 for r in all_recall if r >= 1.0)
    zero_total = sum(1 for r in all_recall if r == 0)
    print("\n  DISTRIBUTION:")
    print(f"    Perfect (1.0):  {perfect_total:4} ({perfect_total / len(all_recall) * 100:.1f}%)")
    print(f"    Zero (0.0):     {zero_total:4} ({zero_total / len(all_recall) * 100:.1f}%)")
    print(f"\n{'=' * 60}\n")
    if out_file:
        with open(out_file, "w") as f:
            json.dump(results_log, f, indent=2)
        print(f"  Results saved to: {out_file}")
 # =============================================================================
 # CLI
 # =============================================================================
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="MemPal × ConvoMem Benchmark")
    parser.add_argument("--limit", type=int, default=100, help="Items per category (default: 100)")
    parser.add_argument("--top-k", type=int, default=10, help="Top-k retrieval (default: 10)")
    parser.add_argument(
        "--category",
        choices=list(CATEGORIES.keys()) + ["all"],
        default="all",
        help="Category to test (default: all)",
    )
    parser.add_argument(
        "--mode",
        choices=["raw", "aaak"],
        default="raw",
        help="Retrieval mode",
    )
    parser.add_argument("--cache-dir", default="/tmp/convomem_cache", help="Cache directory")
    parser.add_argument("--out", default=None, help="Output JSON file")
    args = parser.parse_args()
    if args.category == "all":
        categories = list(CATEGORIES.keys())
    else:
        categories = [args.category]
    if not args.out:
        args.out = f"benchmarks/results_convomem_{args.mode}_top{args.top_k}_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
    run_benchmark(categories, args.limit, args.top_k, args.mode, args.cache_dir, args.out)
@@ -1,508 +0,0 @@
 {
  "dev": [
    "cc06de0d",
    "f9e8c073",
    "b320f3f8",
    "a89d7624",
    "311778f1",
    "gpt4_59c863d7",
    "bbf86515",
    "099778bb",
    "e831120c",
    "dcfa8644",
    "8fb83627",
    "e66b632c",
    "gpt4_7fce9456",
    "55241a1f",
    "352ab8bd",
    "f4f1d8a4",
    "830ce83f",
    "2311e44b",
    "09ba9854",
    "gpt4_a1b77f9c",
    "07741c45",
    "gpt4_70e84552",
    "b46e15ee",
    "6071bd76",
    "6f9b354f",
    "1d4da289",
    "gpt4_8279ba02",
    "6456829e_abs",
    "0db4c65d",
    "d6062bb9",
    "60bf93ed_abs",
    "d3ab962e",
    "87f22b4a",
    "e01b8e2f",
    "gpt4_7ddcf75f",
    "8ebdbe50",
    "26bdc477",
    "29f2956b_abs",
    "2311e44b_abs",
    "75f70248",
    "852ce960",
    "f0e564bc",
    "fca70973",
    "3c1045c8",
    "18bc8abd",
    "afdc33df",
    "54026fce",
    "b9cfe692",
    "6456829e",
    "e6041065"
  ],
  "held_out": [
    "gpt4_15e38248",
    "gpt4_2ba83207",
    "2133c1b5_abs",
    "gpt4_8279ba03",
    "76d63226",
    "1192316e",
    "gpt4_fa19884d",
    "gpt4_372c3eed_abs",
    "1a8a66a6",
    "gpt4_fe651585",
    "e25c3b8d",
    "945e3d21",
    "86b68151",
    "1c0ddc50",
    "1e043500",
    "d682f1a2",
    "gpt4_b5700ca0",
    "91b15a6e",
    "ce6d2d27",
    "f523d9fe",
    "7024f17c",
    "8752c811",
    "gpt4_f420262d",
    "d01c6aa8",
    "4b24c848",
    "7e974930",
    "3fdac837",
    "gpt4_b4a80587",
    "c18a7dc8",
    "80ec1f4f_abs",
    "7527f7e2",
    "6ade9755",
    "89941a94",
    "gpt4_1d80365e",
    "2133c1b5",
    "06db6396",
    "gpt4_88806d6e",
    "88432d0a",
    "3ba21379",
    "0862e8bf",
    "aae3761f",
    "5025383b",
    "gpt4_e061b84f",
    "73d42213",
    "4bc144e2",
    "gpt4_5501fe77",
    "00ca467f",
    "dfde3500",
    "01493427",
    "b6025781",
    "a96c20ee_abs",
    "982b5123_abs",
    "gpt4_fa19884c",
    "gpt4_1a1dc16d",
    "28dc39ac",
    "gpt4_2d58bcd6",
    "51c32626",
    "c4ea545c",
    "1da05512",
    "gpt4_385a5000",
    "577d4d32",
    "72e3ee87",
    "f4f1d8a4_abs",
    "9d25d4e0",
    "b29f3365",
    "b759caee",
    "10e09553",
    "1d4e3b97",
    "d52b4f67",
    "gpt4_e072b769",
    "58ef2f1c",
    "6e984301",
    "41275add",
    "gpt4_59149c77",
    "2ebe6c90",
    "1cea1afa",
    "gpt4_1e4a8aec",
    "6c49646a",
    "8a2466db",
    "gpt4_65aabe59",
    "gpt4_93159ced",
    "51a45a95",
    "af8d2e46",
    "561fabcd",
    "370a8ff4",
    "gpt4_d84a3211",
    "gpt4_7a0daae1",
    "2a1811e2",
    "gpt4_78cf46a3",
    "1568498a",
    "6b7dfb22",
    "6ae235be",
    "bc8a6e93_abs",
    "681a1674",
    "06878be2",
    "1a1907b4",
    "0e4e4c46",
    "gpt4_85da3956",
    "gpt4_f420262c",
    "2bf43736",
    "bc149d6b",
    "09d032c9",
    "5c40ec5b",
    "eac54adc",
    "993da5e2",
    "71a3fd6b",
    "gpt4_0b2f1d21",
    "ad7109d1",
    "4c36ccef",
    "c8c3f81d",
    "edced276_abs",
    "0bc8ad92",
    "gpt4_468eb064",
    "2ebe6c92",
    "cc6d1ec1",
    "4dfccbf8",
    "95228167",
    "ba358f49",
    "45dc21b6",
    "db467c8c",
    "720133ac",
    "67e0d0f2",
    "cc5ded98",
    "726462e0",
    "4100d0a0",
    "3a704032",
    "gpt4_7ca326fa",
    "ec81a493",
    "618f13b2",
    "58470ed2",
    "gpt4_4fc4f797",
    "60036106",
    "157a136e",
    "6222b6eb",
    "69fee5aa",
    "19b5f2b3_abs",
    "gpt4_d12ceb0e",
    "51b23612",
    "2318644b",
    "3fe836c9",
    "gpt4_7de946e7",
    "71017277",
    "f0853d11",
    "dc439ea3",
    "gpt4_2f91af09",
    "9a707b81",
    "bc8a6e93",
    "c14c00dd",
    "8979f9ec",
    "cf22b7bf",
    "gpt4_ec93e27f",
    "gpt4_468eb063",
    "41698283",
    "1de5cff2",
    "21d02d0d",
    "c7cf7dfd",
    "gpt4_ab202e7f",
    "dccbc061",
    "078150f1",
    "e3038f8c",
    "gpt4_c27434e8_abs",
    "2698e78f",
    "031748ae_abs",
    "gpt4_59149c78",
    "c8f1aeed",
    "184da446",
    "gpt4_b5700ca9",
    "89527b6b",
    "0977f2af",
    "853b0a1d",
    "a346bb18",
    "3249768e",
    "gpt4_2f8be40d",
    "gpt4_93159ced_abs",
    "eeda8a6d",
    "7a8d0b71",
    "95bcc1c8",
    "gpt4_2487a7cb",
    "85fa3a3f",
    "7e00a6cb",
    "e3fc4d6e",
    "59524333",
    "37f165cf",
    "0ddfec37",
    "60bf93ed",
    "d7c942c3",
    "80ec1f4f",
    "ceb54acb",
    "9aaed6a3",
    "gpt4_4929293a",
    "ed4ddc30",
    "545bd2b5",
    "2788b940",
    "ef9cf60a",
    "gpt4_7f6b06db",
    "0ea62687",
    "3d86fd0a",
    "3e321797",
    "d24813b1",
    "38146c39",
    "efc3f7c2",
    "7401057b",
    "5809eb10",
    "28bcfaac",
    "1903aded",
    "gpt4_194be4b3",
    "gpt4_e414231f",
    "0ddfec37_abs",
    "c2ac3c61",
    "gpt4_4ef30696",
    "1f2b8d4f",
    "0f05491a",
    "8550ddae",
    "8077ef71",
    "b86304ba",
    "e61a7584",
    "8cf51dda",
    "gpt4_2f584639",
    "08e075c7",
    "5d3d2817",
    "7405e8b1",
    "a3045048",
    "gpt4_731e37d7",
    "c8090214_abs",
    "36580ce8",
    "ba358f49_abs",
    "gpt4_d6585ce8",
    "e56a43b9",
    "2c63a862",
    "gpt4_5438fa52",
    "07b6f563",
    "gpt4_31ff4165",
    "0bb5a684",
    "71315a70",
    "gpt4_cd90e484",
    "gpt4_8c8961ae",
    "gpt4_fe651585_abs",
    "36b9f61e",
    "gpt4_b0863698",
    "gpt4_1d4ab0c9",
    "15745da0_abs",
    "0862e8bf_abs",
    "bcbe585f",
    "a2f3aa27",
    "gpt4_6dc9b45b",
    "ccb36322",
    "f685340e",
    "9ea5eabc",
    "gpt4_372c3eed",
    "37d43f65",
    "bf659f65",
    "b0479f84",
    "gpt4_213fd887",
    "e4e14d04",
    "f8c5f88b",
    "gpt4_18c2b244",
    "a11281a2",
    "gpt4_2655b836",
    "e47becba",
    "gpt4_74aed68e",
    "gpt4_af6db32f",
    "6cb6f249",
    "77eafa52",
    "gpt4_93f6379c",
    "e8a79c70",
    "7a87bd0c",
    "gpt4_6ed717ea",
    "d6233ab6",
    "c19f7a0b",
    "gpt4_61e13b3c",
    "d23cf73b",
    "gpt4_1e4a8aeb",
    "ba61f0b9",
    "118b2229",
    "488d3006",
    "c4a1ceb8",
    "8e91e7d9",
    "42ec0761",
    "65240037",
    "fea54f57",
    "c8090214",
    "b01defab",
    "6aeb4375_abs",
    "faba32e5",
    "c5e8278d",
    "gpt4_e414231e",
    "eeda8a6d_abs",
    "gpt4_8e165409",
    "af082822",
    "22d2cb42",
    "92a0aa75",
    "1c549ce4",
    "25e5aa4f",
    "gpt4_68e94288",
    "4baee567",
    "18dcd5a5",
    "dad224aa",
    "gpt4_f2262a51",
    "29f2956b",
    "21436231",
    "19b5f2b3",
    "gpt4_1916e0ea",
    "gpt4_45189cb4",
    "0a995998",
    "b6019101",
    "9bbe84a2",
    "61f8c8f8",
    "9a707b82",
    "8cf4d046",
    "eac54add",
    "75832dbd",
    "gpt4_98f46fc6",
    "d596882b",
    "88432d0a_abs",
    "16c90bf4",
    "f685340e_abs",
    "b5ef892d",
    "gpt4_f49edff3",
    "gpt4_483dd43c",
    "bb7c3b45",
    "gpt4_7abb270c",
    "gpt4_9a159967",
    "07741c44",
    "4d6b87c8",
    "6aeb4375",
    "gpt4_d6585ce9",
    "60472f9c",
    "caf9ead2",
    "32260d93",
    "60159905",
    "0a34ad58",
    "a40e080f",
    "10d9b85a",
    "a06e4cfe",
    "4f54b7c9",
    "6613b389",
    "70b3e69b",
    "gpt4_7bc6cf22",
    "gpt4_0a05b494",
    "778164c6",
    "195a1a1b",
    "8464fc84",
    "b46e15ed",
    "603deb26",
    "eaca4986",
    "2698e78f_abs",
    "gpt4_21adecb5",
    "2e6d26dc",
    "5831f84d",
    "08f4fc43",
    "3f1e9474",
    "c9f37c46",
    "gpt4_2f56ae70",
    "1b9b7252",
    "35a27287",
    "gpt4_d31cdae3",
    "129d1232",
    "4adc0475",
    "27016adc",
    "46a3abf7",
    "9ee3ecd6",
    "982b5123",
    "09ba9854_abs",
    "0e5e2d1a",
    "e9327a54",
    "86f00804",
    "e982271f",
    "7161e7e2",
    "57f827a0",
    "6a27ffc2",
    "edced276",
    "gpt4_d9af6064",
    "75499fd8",
    "60d45044",
    "gpt4_70e84552_abs",
    "2ce6a0f2",
    "gpt4_4929293b",
    "a1cc6108",
    "gpt4_5dcc0aab",
    "a3838d2b",
    "c7dc5443",
    "505af2f5",
    "gpt4_68e94287",
    "15745da0",
    "0100672e",
    "a82c026e",
    "5e1b23de",
    "71017276",
    "89941a93",
    "6b168ec8",
    "affe2881",
    "0edc2aef",
    "gpt4_2312f94c",
    "a4996e51",
    "c6853660",
    "ef66a6e5",
    "8a137a7f",
    "a96c20ee",
    "fca762bc",
    "ac031881",
    "d905b33f",
    "e493bb7c",
    "a9f6b44c",
    "dd2973ad",
    "8aef76bc",
    "f35224e0",
    "8b9d4367",
    "gpt4_c27434e8",
    "gpt4_a56e767c",
    "eace081b",
    "5a4f22c0",
    "58bf7951",
    "c4f10528",
    "50635ada",
    "06f04340",
    "0bc8ad93",
    "e5ba910e_abs",
    "5a7937c8",
    "a3332713",
    "4388e9dd",
    "8c18457d",
    "gpt4_2c50253f",
    "6a1eabeb",
    "b3c15d39",
    "gpt4_e061b84g",
    "3b6f954b",
    "gpt4_76048e76",
    "4dfccbf7",
    "2b8f3739",
    "d851d5ba",
    "4fd1909e",
    "94f70d80",
    "66f24dbb",
    "a08a253f",
    "6e984302",
    "001be529",
    "gpt4_a2d1d1f6",
    "cc539528",
    "e48988bc",
    "gpt4_4cd9eba1",
    "8e9d538c",
    "a1eacc2a",
    "6d550036",
    "gpt4_e05b82a6",
    "81507db6",
    "caf03d32",
    "031748ae",
    "c960da58",
    "1faac195",
    "gpt4_4edbafa2"
  ],
  "seed": 42,
  "dev_size": 50
 }
@@ -1,470 +0,0 @@
 #!/usr/bin/env python3
 """
 MemPal × MemBench Benchmark
 ============================
 MemBench (ACL 2025): https://aclanthology.org/2025.findings-acl.989/
 Data: https://github.com/import-myself/Membench
 MemBench tests memory across multi-turn conversations in multiple categories:
  - highlevel: inferences requiring aggregation across turns ("what kind of X do I prefer?")
  - lowlevel: single-turn fact recall ("what X did I mention?")
  - knowledge_update: facts that change over time
  - comparative: comparing two items mentioned across turns
  - conditional: conditional reasoning over remembered facts
  - noisy: distractors / irrelevant info mixed in
  - aggregative: combining info from multiple turns
  - RecMultiSession: recommendations across multiple topic sessions
 Each item has:
  - message_list[0]: list of turns [{user, assistant, time, place}]
  - QA: {question, answer, choices (A/B/C/D), ground_truth, target_step_id}
 We measure RETRIEVAL RECALL: is the answer-relevant turn in the top-K retrieved?
 We also score ACCURACY: does the top-retrieved turn's context match ground_truth?
 Usage:
    python benchmarks/membench_bench.py /tmp/membench/MemData/FirstAgent
    python benchmarks/membench_bench.py /tmp/membench/MemData/FirstAgent --category highlevel
    python benchmarks/membench_bench.py /tmp/membench/MemData/FirstAgent --limit 50
 """
 import sys
 import json
 import re
 import argparse
 from pathlib import Path
 from datetime import datetime
 from collections import defaultdict
 import chromadb
 sys.path.insert(0, str(Path(__file__).parent.parent))
 # ── Shared ephemeral ChromaDB client ──────────────────────────────────────────
 _bench_client = chromadb.EphemeralClient()
 def _fresh_collection(name="membench_drawers"):
    try:
        _bench_client.delete_collection(name)
    except Exception:
        pass
    return _bench_client.create_collection(name)
 # ── Stop words (same as locomo_bench) ─────────────────────────────────────────
 STOP_WORDS = {
    "what",
    "when",
    "where",
    "who",
    "how",
    "which",
    "did",
    "do",
    "was",
    "were",
    "have",
    "has",
    "had",
    "is",
    "are",
    "the",
    "a",
    "an",
    "my",
    "me",
    "i",
    "you",
    "your",
    "their",
    "it",
    "its",
    "in",
    "on",
    "at",
    "to",
    "for",
    "of",
    "with",
    "by",
    "from",
    "ago",
    "last",
    "that",
    "this",
    "there",
    "about",
    "get",
    "got",
    "give",
    "gave",
    "buy",
    "bought",
    "made",
    "make",
    "said",
    "would",
    "could",
    "should",
    "might",
    "can",
    "will",
    "shall",
    "kind",
    "type",
    "like",
    "prefer",
    "enjoy",
    "think",
    "feel",
 }
 NOT_NAMES = {
    "What",
    "When",
    "Where",
    "Who",
    "How",
    "Which",
    "Did",
    "Do",
    "Was",
    "Were",
    "Have",
    "Has",
    "Had",
    "Is",
    "Are",
    "The",
    "My",
    "Our",
    "I",
    "It",
    "Its",
    "This",
    "That",
    "These",
    "Those",
 }
 def _kw(text):
    words = re.findall(r"\b[a-z]{3,}\b", text.lower())
    return [w for w in words if w not in STOP_WORDS]
 def _kw_overlap(query_kws, doc_text):
    if not query_kws:
        return 0.0
    doc_lower = doc_text.lower()
    hits = sum(1 for kw in query_kws if kw in doc_lower)
    return hits / len(query_kws)
 def _person_names(text):
    words = re.findall(r"\b[A-Z][a-z]{2,15}\b", text)
    return list(set(w for w in words if w not in NOT_NAMES))
 # ── MemBench data loading ─────────────────────────────────────────────────────
 CATEGORY_FILES = {
    "simple": "simple.json",
    "highlevel": "highlevel.json",
    "knowledge_update": "knowledge_update.json",
    "comparative": "comparative.json",
    "conditional": "conditional.json",
    "noisy": "noisy.json",
    "aggregative": "aggregative.json",
    "highlevel_rec": "highlevel_rec.json",
    "lowlevel_rec": "lowlevel_rec.json",
    "RecMultiSession": "RecMultiSession.json",
    "post_processing": "post_processing.json",
 }
 def load_membench(data_dir: str, categories=None, topic="movie", limit=0):
    """
    Load MemBench questions from the FirstAgent directory.
    Returns list of dicts:
        {category, topic, tid, turns, question, choices, ground_truth, target_step_ids}
    """
    data_dir = Path(data_dir)
    if categories is None:
        categories = list(CATEGORY_FILES.keys())
    items = []
    for cat in categories:
        fname = CATEGORY_FILES.get(cat)
        if not fname:
            continue
        fpath = data_dir / fname
        if not fpath.exists():
            continue
        with open(fpath) as f:
            raw = json.load(f)
        # Files have two formats:
        #   topic-keyed: {"movie": [...], "food": [...], "book": [...]}
        #   role-keyed:  {"roles": [...], "events": [...]}
        # For topic-keyed, filter by topic arg. For role-keyed, use key as the "topic".
        for t, topic_items in raw.items():
            if topic and t not in (topic, "roles", "events"):
                continue
            for item in topic_items:
                turns = item.get("message_list", [])  # pass full message_list (all sessions)
                qa = item.get("QA", {})
                if not turns or not qa:
                    continue
                items.append(
                    {
                        "category": cat,
                        "topic": t,
                        "tid": item.get("tid", 0),
                        "turns": turns,
                        "question": qa.get("question", ""),
                        "choices": qa.get("choices", {}),
                        "ground_truth": qa.get("ground_truth", ""),
                        "answer_text": qa.get("answer", ""),
                        "target_step_ids": qa.get("target_step_id", []),
                    }
                )
    if limit > 0:
        items = items[:limit]
    return items
 # ── Indexing ──────────────────────────────────────────────────────────────────
 def _turn_text(turn: dict) -> str:
    """Extract text from a turn regardless of field naming convention."""
    user = turn.get("user") or turn.get("user_message", "")
    asst = turn.get("assistant") or turn.get("assistant_message", "")
    time = turn.get("time", "")
    text = f"[User] {user} [Assistant] {asst}"
    if time:
        text = f"[{time}] " + text
    return text
 def index_turns(collection, message_list, item_key: str):
    """
    Index all turns from all sessions into the collection.
    message_list can be:
      - Flat list of turns: [turn, turn, ...]  (highlevel.json format)
      - List of sessions: [[turn, turn], [turn, turn], ...]  (simple.json format)
    Each turn keyed by 'sid' if present, else by positional index.
    Returns number of turns indexed.
    """
    docs, ids, metas = [], [], []
    # Normalize: flat list of dicts → wrap as one session
    if message_list and isinstance(message_list[0], dict):
        sessions = [message_list]
    else:
        sessions = message_list
    global_idx = 0
    for s_idx, session in enumerate(sessions):
        if not isinstance(session, list):
            continue
        for t_idx, turn in enumerate(session):
            if not isinstance(turn, dict):
                continue
            sid = turn.get("sid", turn.get("mid"))
            doc_id = f"{item_key}_g{global_idx}"
            text = _turn_text(turn)
            docs.append(text)
            ids.append(doc_id)
            metas.append(
                {
                    "item_key": item_key,
                    "sid": int(sid) if isinstance(sid, (int, float)) else global_idx,
                    "s_idx": s_idx,
                    "t_idx": t_idx,
                    "global_idx": global_idx,
                }
            )
            global_idx += 1
    if docs:
        collection.add(documents=docs, ids=ids, metadatas=metas)
    return len(docs)
 # ── Scoring ───────────────────────────────────────────────────────────────────
 def run_membench(
    data_dir, categories=None, topic="movie", top_k=5, limit=0, mode="raw", out_file=None
 ):
    """Run MemBench retrieval evaluation."""
    items = load_membench(data_dir, categories=categories, topic=topic, limit=limit)
    if not items:
        print(f"No items found in {data_dir}")
        return
    print(f"\n{'=' * 58}")
    print("  MemPal × MemBench")
    print(f"{'=' * 58}")
    print(f"  Data dir:    {data_dir}")
    print(f"  Categories:  {', '.join(categories or ['all'])}")
    print(f"  Topic:       {topic or 'all'}")
    print(f"  Items:       {len(items)}")
    print(f"  Top-k:       {top_k}")
    print(f"  Mode:        {mode}")
    print(f"{'─' * 58}\n")
    results = []
    by_cat = defaultdict(lambda: {"hit_at_k": 0, "total": 0})
    total_hit = 0
    for idx, item in enumerate(items, 1):
        item_key = f"{item['category']}_{item['topic']}_{idx}"  # idx ensures unique key
        collection = _fresh_collection()
        # Index all turns from all sessions
        n_indexed = index_turns(collection, item["turns"], item_key)
        if n_indexed < 1:
            continue
        question = item["question"]
        n_retrieve = min(top_k * 3 if mode == "hybrid" else top_k, n_indexed)
        if n_retrieve < 1:
            continue
        # Retrieve
        res = collection.query(
            query_texts=[question],
            n_results=n_retrieve,
            include=["distances", "metadatas", "documents"],
        )
        retrieved_sids = [m["sid"] for m in res["metadatas"][0]]
        retrieved_global = [m["global_idx"] for m in res["metadatas"][0]]
        retrieved_docs = res["documents"][0]
        raw_distances = res["distances"][0]
        # Hybrid re-scoring: predicate keywords (person names excluded)
        if mode == "hybrid":
            names = _person_names(question)
            name_words = {n.lower() for n in names}
            all_kws = _kw(question)
            predicate_kws = [w for w in all_kws if w not in name_words]
            scored = []
            for dist, sid, gidx, doc in zip(
                raw_distances, retrieved_sids, retrieved_global, retrieved_docs
            ):
                pred_overlap = _kw_overlap(predicate_kws, doc)
                fused = dist * (1.0 - 0.50 * pred_overlap)
                scored.append((fused, sid, gidx, doc))
            scored.sort(key=lambda x: x[0])
            retrieved_sids = [x[1] for x in scored[:top_k]]
            retrieved_global = [x[2] for x in scored[:top_k]]
        else:
            retrieved_sids = retrieved_sids[:top_k]
            retrieved_global = retrieved_global[:top_k]
        # Check if any target turn is retrieved.
        # target_step_id format varies: [sid, ?] or [global_idx, ?]
        # Try matching against both sid and global_idx.
        target_sids = set()
        for step in item["target_step_ids"]:
            if isinstance(step, list) and len(step) >= 1:
                target_sids.add(step[0])  # first element is the turn sid/global index
        hit = bool(target_sids & set(retrieved_sids)) or bool(target_sids & set(retrieved_global))
        if hit:
            total_hit += 1
            by_cat[item["category"]]["hit_at_k"] += 1
        by_cat[item["category"]]["total"] += 1
        results.append(
            {
                "category": item["category"],
                "topic": item["topic"],
                "tid": item["tid"],
                "question": question,
                "ground_truth": item["ground_truth"],
                "answer_text": item["answer_text"],
                "target_sids": list(target_sids),
                "retrieved_sids": retrieved_sids,
                "retrieved_global": retrieved_global,
                "hit_at_k": hit,
            }
        )
        if idx % 50 == 0:
            running_pct = total_hit / idx * 100
            print(f"  [{idx:4}/{len(items)}]  running R@{top_k}: {running_pct:.1f}%")
    # Final results
    overall = total_hit / len(items) * 100 if items else 0
    print(f"\n{'=' * 58}")
    print(f"  RESULTS — MemPal on MemBench ({mode} mode, top-{top_k})")
    print(f"{'=' * 58}")
    print(f"\n  Overall R@{top_k}: {overall:.1f}%  ({total_hit}/{len(items)})\n")
    print("  By category:")
    for cat, v in sorted(by_cat.items()):
        pct = v["hit_at_k"] / v["total"] * 100 if v["total"] else 0
        print(f"    {cat:20} {pct:5.1f}%  ({v['hit_at_k']}/{v['total']})")
    print(f"\n{'=' * 58}\n")
    if out_file:
        with open(out_file, "w") as f:
            json.dump(results, f, indent=2)
        print(f"  Results saved to: {out_file}")
    return results
 # ── CLI ───────────────────────────────────────────────────────────────────────
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="MemPal × MemBench Benchmark")
    parser.add_argument("data_dir", help="Path to MemBench FirstAgent directory")
    parser.add_argument(
        "--category",
        default=None,
        choices=list(CATEGORY_FILES.keys()),
        help="Run a single category (default: all)",
    )
    parser.add_argument(
        "--topic", default="movie", help="Topic filter: movie, food, book (default: movie)"
    )
    parser.add_argument("--top-k", type=int, default=5, help="Retrieval top-k (default: 5)")
    parser.add_argument("--limit", type=int, default=0, help="Limit items (0 = all)")
    parser.add_argument(
        "--mode",
        choices=["raw", "hybrid"],
        default="hybrid",
        help="Retrieval mode (default: hybrid)",
    )
    parser.add_argument("--out", default=None, help="Output JSON file (default: auto-named)")
    args = parser.parse_args()
    if not args.out:
        cat_tag = f"_{args.category}" if args.category else "_all"
        args.out = (
            f"benchmarks/results_membench_{args.mode}{cat_tag}_{args.topic}"
            f"_top{args.top_k}_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
        )
    cats = [args.category] if args.category else None
    run_membench(
        args.data_dir,
        categories=cats,
        topic=args.topic,
        top_k=args.top_k,
        limit=args.limit,
        mode=args.mode,
        out_file=args.out,
    )
@@ -1,301 +0,0 @@
 """Mining throughput benchmark: per-chunk vs batched upsert, CPU vs GPU.
 Compares the legacy per-chunk ``add_drawer`` loop against the batched
 ``collection.upsert`` path introduced in the "batched upsert + GPU" PR.
 Runs both paths on an identical seeded synthetic corpus, reports
 wall-clock time + drawers/sec, and prints a markdown table suitable
 for pasting into a PR description.
 Usage
 -----
    # CPU (whatever onnxruntime is installed — CPU if you don't have
    # onnxruntime-gpu):
    uv run python benchmarks/mine_bench.py
    # GPU (NVIDIA):
    uv venv /tmp/gpu && source /tmp/gpu/bin/activate
    uv pip install -e '.[gpu]' 'nvidia-cudnn-cu12>=9,<10' \\
        'nvidia-cuda-runtime-cu12' 'nvidia-cublas-cu12'
    export LD_LIBRARY_PATH=$(python -c "import nvidia.cudnn, os; \\
        print(os.path.dirname(nvidia.cudnn.__file__)+'/lib')"):$LD_LIBRARY_PATH
    MEMPALACE_EMBEDDING_DEVICE=cuda python benchmarks/mine_bench.py
 Flags
 -----
    --device cpu|cuda|coreml|dml|auto   Override MEMPALACE_EMBEDDING_DEVICE
    --scenarios small,medium,large      Which scenarios to run
    --seed 42                           RNG seed for reproducibility
 """
 from __future__ import annotations
 import argparse
 import hashlib
 import os
 import random
 import shutil
 import string
 import sys
 import tempfile
 import time
 from datetime import datetime
 from pathlib import Path
 def build_corpus(dest: Path, n_files: int, paragraphs_per_file: int, seed: int) -> None:
    """Generate ``n_files`` markdown files of random words under ``dest``."""
    rng = random.Random(seed)
    dest.mkdir(parents=True, exist_ok=True)
    for i in range(n_files):
        paragraphs = []
        for _ in range(paragraphs_per_file):
            words = [
                "".join(rng.choices(string.ascii_lowercase, k=rng.randint(3, 10)))
                for _ in range(12)
            ]
            paragraphs.append(" ".join(words))
        (dest / f"doc_{i:03d}.md").write_text("\n\n".join(paragraphs))
    (dest / "mempalace.yaml").write_text(
        "wing: bench\n"
        "rooms:\n"
        "  - name: general\n"
        "    description: all\n"
        "    keywords: [general]\n"
    )
 def _process_file_unbatched(filepath, project_path, collection, wing, rooms, agent, closets_col):
    """Legacy per-chunk upsert path (pre-batching).
    Reproduces the exact loop shape the miner used before this PR so the
    comparison is apples-to-apples; only the upsert granularity differs.
    """
    from mempalace import miner
    from mempalace.palace import (
        build_closet_lines,
        file_already_mined,
        mine_lock,
        purge_file_closets,
        upsert_closet_lines,
    )
    source_file = str(filepath)
    if file_already_mined(collection, source_file, check_mtime=True):
        return 0, "general"
    try:
        content = filepath.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return 0, "general"
    content = content.strip()
    if len(content) < miner.MIN_CHUNK_SIZE:
        return 0, "general"
    room = miner.detect_room(filepath, content, rooms, project_path)
    chunks = miner.chunk_text(content, source_file)
    with mine_lock(source_file):
        if file_already_mined(collection, source_file, check_mtime=True):
            return 0, room
        try:
            collection.delete(where={"source_file": source_file})
        except Exception:
            pass
        drawers_added = 0
        for chunk in chunks:
            miner.add_drawer(
                collection=collection,
                wing=wing,
                room=room,
                content=chunk["content"],
                source_file=source_file,
                chunk_index=chunk["chunk_index"],
                agent=agent,
            )
            drawers_added += 1
        if closets_col and drawers_added > 0:
            drawer_ids = [
                f"drawer_{wing}_{room}_"
                f"{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
                for c in chunks
            ]
            closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room)
            closet_id_base = (
                f"closet_{wing}_{room}_"
                f"{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
            )
            closet_meta = {
                "wing": wing,
                "room": room,
                "source_file": source_file,
                "drawer_count": drawers_added,
                "filed_at": datetime.now().isoformat(),
                "normalize_version": miner.NORMALIZE_VERSION,
            }
            purge_file_closets(closets_col, source_file)
            upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta)
    return drawers_added, room
 def mine_once(project_dir: str, palace_path: str, batched: bool) -> tuple[int, float]:
    """Mine a project dir with either the batched (new) or per-chunk (old) path."""
    from mempalace import miner
    from mempalace.miner import load_config, scan_project
    from mempalace.palace import get_closets_collection, get_collection
    project_path = Path(project_dir).resolve()
    config = load_config(project_dir)
    wing = config["wing"]
    rooms = config.get("rooms", [])
    files = scan_project(project_dir)
    collection = get_collection(palace_path)
    closets = get_closets_collection(palace_path)
    total = 0
    t0 = time.perf_counter()
    for filepath in files:
        if batched:
            drawers, _ = miner.process_file(
                filepath=filepath,
                project_path=project_path,
                collection=collection,
                wing=wing,
                rooms=rooms,
                agent="bench",
                dry_run=False,
                closets_col=closets,
            )
        else:
            drawers, _ = _process_file_unbatched(
                filepath, project_path, collection, wing, rooms, "bench", closets
            )
        total += drawers
    return total, time.perf_counter() - t0
 def _reset_backend_caches() -> None:
    """Drop the in-process client cache so each run pays cold-open cost equally."""
    from mempalace.palace import _DEFAULT_BACKEND
    _DEFAULT_BACKEND._clients.clear()
    _DEFAULT_BACKEND._freshness.clear()
 def run_scenario(label: str, n_files: int, paragraphs_per_file: int, seed: int) -> dict:
    """Run one scenario under both code paths and return a result dict."""
    print(f"\n=== {label}: {n_files} files × {paragraphs_per_file} paragraphs ===")
    results = {}
    for mode in ("unbatched", "batched"):
        tmp = Path(tempfile.mkdtemp(prefix=f"mp_{mode}_"))
        try:
            proj = tmp / "proj"
            palace = tmp / "palace"
            build_corpus(proj, n_files, paragraphs_per_file, seed=seed)
            _reset_backend_caches()
            drawers, dt = mine_once(str(proj), str(palace), batched=(mode == "batched"))
            rate = drawers / dt if dt > 0 else 0.0
            results[mode] = (drawers, dt, rate)
            print(f"  {mode:10} {drawers:5} drawers in {dt:6.2f}s  →  {rate:7.1f} drawers/sec")
        finally:
            shutil.rmtree(tmp, ignore_errors=True)
    _, t_u, r_u = results["unbatched"]
    d_b, t_b, r_b = results["batched"]
    speedup = t_u / t_b if t_b > 0 else 0.0
    print(f"  speedup:   {speedup:.2f}× ({t_u:.2f}s → {t_b:.2f}s)")
    return {
        "label": label,
        "n_files": n_files,
        "paragraphs": paragraphs_per_file,
        "drawers": d_b,
        "unbatched_time": t_u,
        "unbatched_rate": r_u,
        "batched_time": t_b,
        "batched_rate": r_b,
        "speedup": speedup,
    }
 SCENARIOS = {
    "small":  ("Small files (~50 paragraphs)",  10, 50),
    "medium": ("Medium files (~200 paragraphs)", 20, 200),
    "large":  ("Large files (~500 paragraphs)",  10, 500),
 }
 def _env_summary(device_label: str) -> list[str]:
    """Short hardware + version lines included with the printed table."""
    import platform
    try:
        import chromadb
        chromadb_v = chromadb.__version__
    except Exception:
        chromadb_v = "?"
    try:
        import onnxruntime as ort
        ort_v = ort.__version__
        providers = ",".join(p.replace("ExecutionProvider", "") for p in ort.get_available_providers())
    except Exception:
        ort_v = "?"
        providers = "?"
    return [
        f"device: **{device_label}** (onnxruntime {ort_v}, providers={providers})",
        f"chromadb {chromadb_v} · python {sys.version.split()[0]} · {platform.platform()}",
    ]
 def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0])
    parser.add_argument(
        "--device",
        default=None,
        help="Override MEMPALACE_EMBEDDING_DEVICE (cpu|cuda|coreml|dml|auto)",
    )
    parser.add_argument(
        "--scenarios",
        default="small,medium,large",
        help="Comma-separated scenario names (default: all)",
    )
    parser.add_argument("--seed", type=int, default=42)
    args = parser.parse_args()
    if args.device:
        os.environ["MEMPALACE_EMBEDDING_DEVICE"] = args.device
    from mempalace.embedding import describe_device, get_embedding_function
    device_label = describe_device()
    print(f"Warming up ONNX model on device={device_label}...")
    ef = get_embedding_function()
    ef(["warmup sentence one", "warmup sentence two"])
    picked = [s.strip() for s in args.scenarios.split(",") if s.strip()]
    results = []
    for key in picked:
        if key not in SCENARIOS:
            print(f"Unknown scenario {key!r}; choices: {sorted(SCENARIOS)}", file=sys.stderr)
            sys.exit(2)
        label, n_files, paras = SCENARIOS[key]
        results.append(run_scenario(label, n_files, paras, args.seed))
    print("\n\n## Mining benchmark\n")
    for line in _env_summary(device_label):
        print(line + "  ")
    print()
    print("| Scenario | Files | Drawers | Per-chunk (old) | Batched (new) | Speedup |")
    print("| --- | ---: | ---: | ---: | ---: | ---: |")
    for r in results:
        print(
            f"| {r['label']} | {r['n_files']} | {r['drawers']} | "
            f"{r['unbatched_time']:.2f}s · {r['unbatched_rate']:.0f} drw/s | "
            f"{r['batched_time']:.2f}s · {r['batched_rate']:.0f} drw/s | "
            f"**{r['speedup']:.2f}×** |"
        )
 if __name__ == "__main__":
    main()
@@ -0,0 +1,61 @@
 # MemPalace Caddy reverse-proxy config.
 # -----------------------------------------------------------------------------
 # Listens on :8443 with a self-signed (Caddy-internal) cert. Enforces a
 # bearer-token check on every request and proxies authenticated traffic to
 # the mempalace container.
 #
 # Two upstream paths:
 #   /sse, /messages*  -> mempalace:8765  (mcp-proxy SSE for MCP tool calls)
 #   /ingest*          -> mempalace:8766  (in-process HTTP ingest endpoint)
 #   /healthz          -> mempalace:8766  (no auth, for liveness probes)
 #
 # Token comes from the MEMPAL_TOKEN env var (set in deploy/unraid/.env).
 # -----------------------------------------------------------------------------
 {
 	# Disable the admin API — never expose it from a container that's
 	# reachable from clients.
 	admin off
 	# Ship access logs to stderr so `docker logs caddy` is useful.
 	log {
 		output stderr
 		format console
 	}
 }
 :8443 {
 	tls internal
 	# Liveness probe — no auth so Docker / external monitors can hit it
 	# without holding the bearer token.
 	handle /healthz {
 		reverse_proxy mempalace:8766
 	}
 	# Auth gate. matcher passes only when the Authorization header is
 	# exactly `Bearer ${MEMPAL_TOKEN}`. Header matching is exact-match.
 	@authorized header Authorization "Bearer {$MEMPAL_TOKEN}"
 	# MCP-over-SSE: the MCP transport sends events on /sse and accepts
 	# JSON-RPC POSTs on /messages (path varies by mcp-proxy version, so
 	# proxy the whole prefix tree).
 	handle @authorized {
 		# SSE responses are streamed — disable buffering and force HTTP/1.1
 		# upstream to keep the event stream open.
 		reverse_proxy /sse* /messages* mempalace:8765 {
 			flush_interval -1
 			transport http {
 				versions 1.1
 			}
 		}
 		reverse_proxy /ingest* mempalace:8766
 	}
 	# Default: anything not matched above (or unauthenticated traffic) is
 	# rejected. Returning 401 instead of 403 is correct here — clients with
 	# no/invalid token can re-attempt with credentials.
 	respond 401 {
 		body "Unauthorized"
 		close
 	}
 }
@@ -0,0 +1,512 @@
 # MemPalace on Unraid — server-mode deployment
 This directory contains everything needed to run MemPalace as a shared
 memory server on an Unraid box and connect multiple AI tools (Claude
 Code, Codex, Antigravity, or any MCP-compatible client) to a single
 persistent palace.
 If you only use one machine, you don't need any of this — install
 mempalace locally per the main [README](../../README.md) and you're
 done. This guide is for users running the same AI tools across multiple
 machines who want one shared memory.
 ---
 ## What you get
 ```
                     home LAN
   ┌───────────────────────────────────┐
   │        Unraid (always on)         │
   │   ┌────────────────────────────┐  │
   │   │ caddy :8443 (TLS + auth)   │  │
   │   │   ├─ /sse     → mcp-proxy  │  │
   │   │   └─ /ingest  → ingest API │  │
   │   │ mempalace (single process) │  │
   │   │   ├─ mcp-proxy :8765       │  │
   │   │   └─ ingest   :8766        │  │
   │   └────────────────────────────┘  │
   │   /mnt/user/appdata/mempalace/    │
   │     ├─ palace/  ChromaDB          │
   │     ├─ kg/      knowledge graph   │
   │     └─ inbox/   uploaded sessions │
   └───────────────────────────────────┘
              │           │           │
        ┌─────┴─┐    ┌────┴──┐    ┌───┴──────┐
        │ box A │    │ box B │    │ box C    │
        │ Claude│    │ Codex │    │ Antigrav │
        └───────┘    └───────┘    └──────────┘
 ```
 * **One palace, many clients.** Search and write target the same
  ChromaDB index regardless of which machine you're on.
 * **Auto-save hooks work across machines.** Each client's session
  transcripts get pushed to the server on `Stop` and `PreCompact`
  events; the server-side miner runs the existing `mine_convos`
  pipeline (entity detection, room assignment, dedup, idempotency).
 * **Single shared secret.** One bearer token gates both MCP and
  transcript ingest at the Caddy edge.
 What this is **not**: a multi-tenant cloud product. There's one palace,
 one token, no per-user isolation. It's designed for a single user with
 multiple machines.
 ---
 ## Files in this directory
 | File | Purpose |
 |---|---|
 | `docker-compose.yml` | Two-container stack: `mempalace` + `caddy` sidecar. |
 | `Caddyfile` | Caddy config: bearer-token auth, self-signed TLS, SSE-aware reverse proxy. |
 | `mempalace-server.xml` | dockerMan template for a single-container, **no-auth, LAN-trust-only** install (compose path is the recommended one). |
 | `README.md` | This file. |
 The `Dockerfile` and `.dockerignore` live at the repo root — the compose
 build context is `../..` so it can reach them.
 ---
 ## Prerequisites
 * Unraid 6.12+ with Docker enabled (default).
 * The **Compose Manager** plugin from Community Apps. Required for the
  recommended (auth-enabled) path. The dockerMan template path doesn't
  need it but has no auth.
 * `/mnt/user/appdata` set up (default on every Unraid).
 * Ports `8443` free on the Unraid host (or change in `docker-compose.yml`).
 You do **not** need Tailscale, WireGuard, a domain name, a public IP,
 SWAG, or NPM. The stack is self-contained.
 ---
 ## Install (recommended: compose with auth)
 ### 1. Get the repo onto Unraid
 SSH to Unraid, pick a path on a regular share (not `/boot`, not
 `/mnt/cache` directly), and clone or copy the repo:
 ```bash
 mkdir -p /mnt/user/system/build
 cd /mnt/user/system/build
 git clone <your-fork-or-rsync-source> mempalace
 cd mempalace/deploy/unraid
 ```
 ### 2. Mint a bearer token
 ```bash
 TOKEN=$(openssl rand -hex 32)
 echo "MEMPAL_TOKEN=$TOKEN" > .env
 chmod 600 .env
 echo "Token: $TOKEN"   # save to a password manager — you'll set this on each client
 ```
 `MEMPAL_TOKEN` is read from `.env` by `docker compose`. The same token
 is forwarded to:
 * Caddy, which checks `Authorization: Bearer <token>` on every request.
 * The in-container ingest server as `MEMPALACE_INGEST_TOKEN` for
  defense-in-depth.
 ### 3. Create the appdata directories
 ```bash
 mkdir -p /mnt/user/appdata/mempalace \
         /mnt/user/appdata/mempalace-caddy/data \
         /mnt/user/appdata/mempalace-caddy/config
 chown -R 99:100 /mnt/user/appdata/mempalace
 chown -R 99:100 /mnt/user/appdata/mempalace-caddy
 ```
 The Caddy data dir holds Caddy's auto-generated root CA — back it up
 so re-deploys keep the same cert (clients won't have to re-trust it).
 ### 4. Build and start
 ```bash
 docker compose up -d --build
 ```
 First build downloads Python 3.13-slim and pip-installs `mempalace` +
 `mcp-proxy` (~3–5 min on a Celeron, faster on real hardware).
 ### 5. Verify
 ```bash
 # unauth'd liveness probe
 curl -k https://<unraid-ip>:8443/healthz
 # → {"status":"ok","version":"3.3.x"}
 # bearer-checked endpoint should 401 without the token
 curl -ki https://<unraid-ip>:8443/ingest/transcript
 # HTTP/2 401
 # ...and accept a request with it
 curl -k -H "Authorization: Bearer $TOKEN" https://<unraid-ip>:8443/healthz
 # → 200 OK
 ```
 If you see all of the above, the server is up and the auth gate is
 working.
 ### 6. (Optional) Trust Caddy's root CA on each client
 Caddy's `tls internal` directive auto-generates a self-signed root CA
 on first start. Clients must either trust that CA or skip TLS
 verification (`-k` for curl, `MEMPAL_REMOTE_INSECURE=1` for hooks,
 disabled SSL verify for the MCP client).
 To trust it once and stop seeing TLS warnings:
 ```bash
 # On Unraid:
 cat /mnt/user/appdata/mempalace-caddy/data/caddy/pki/authorities/local/root.crt
 ```
 Copy that PEM block to each Windows client and import into the
 **Trusted Root Certification Authorities** store via `certmgr.msc`,
 or via PowerShell:
 ```powershell
 Import-Certificate -FilePath C:\path\to\root.crt -CertStoreLocation Cert:\LocalMachine\Root
 ```
 ---
 ## Connect AI tools
 You'll need [`mcp-proxy`](https://github.com/sparfenyuk/mcp-proxy) on
 each client machine:
 ```bash
 uv tool install mcp-proxy
 # or:
 pip install mcp-proxy
 ```
 Set environment variables persistently. **PowerShell** (Windows):
 ```powershell
 [Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_URL",   "https://<unraid-ip>:8443", "User")
 [Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_TOKEN", "<the-token>",              "User")
 # Drop this once you've trusted Caddy's root CA:
 [Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_INSECURE", "1", "User")
 ```
 **Bash/Zsh** (macOS/Linux): add the same three exports to
 `~/.zshrc` / `~/.bashrc`.
 ### Claude Code
 Add to `~/.claude.json` (user-scoped) or `.mcp.json` in the project:
 ```json
 {
  "mcpServers": {
    "mempalace": {
      "command": "mcp-proxy",
      "args": [
        "https://<unraid-ip>:8443/sse",
        "--headers", "Authorization", "Bearer <the-token>"
      ],
      "env": {
        "PYTHONHTTPSVERIFY": "0"
      }
    }
  }
 }
 ```
 Drop the `env` block once Caddy's root CA is trusted on the client.
 ### Codex CLI
 Add to `~/.codex/config.toml`:
 ```toml
 [mcp_servers.mempalace]
 command = "mcp-proxy"
 args = [
  "https://<unraid-ip>:8443/sse",
  "--headers", "Authorization", "Bearer <the-token>",
 ]
 [mcp_servers.mempalace.env]
 PYTHONHTTPSVERIFY = "0"
 ```
 ### Antigravity
 Antigravity uses the Windsurf-derived MCP layout. Open the IDE's
 MCP settings UI (Settings → AI → MCP Servers) and add:
 ```json
 {
  "mempalace": {
    "command": "mcp-proxy",
    "args": [
      "https://<unraid-ip>:8443/sse",
      "--headers", "Authorization", "Bearer <the-token>"
    ]
  }
 }
 ```
 Or edit `~/.antigravity/mcp.json` directly with the same shape.
 ### Verify each client
 In any of the three tools, start a session and call:
 > "Use mempalace_status to show palace stats."
 Expected: a JSON blob with `total_drawers`, wing/room breakdown, etc.
 A 401 means the token is wrong; a connection error means the
 URL/cert is wrong.
 ---
 ## Set up auto-save hooks
 The `_remote.sh` hook variants in `../../hooks/` push transcripts to
 the server instead of running `mempalace mine` locally. They share the
 same env-var contract as the MCP client config above.
 ### Claude Code
 Make the scripts executable:
 ```bash
 chmod +x hooks/mempal_save_hook_remote.sh \
         hooks/mempal_precompact_hook_remote.sh
 ```
 Add to `.claude/settings.local.json`:
 ```json
 {
  "hooks": {
    "Stop": [{
      "matcher": "*",
      "hooks": [{
        "type": "command",
        "command": "/abs/path/to/hooks/mempal_save_hook_remote.sh",
        "timeout": 30
      }]
    }],
    "PreCompact": [{
      "hooks": [{
        "type": "command",
        "command": "/abs/path/to/hooks/mempal_precompact_hook_remote.sh",
        "timeout": 60
      }]
    }]
  }
 }
 ```
 ### Codex CLI
 Add to `.codex/hooks.json` with the same shape — the scripts are
 hook-host-agnostic.
 ### What the hooks do
 | Hook | Trigger | Behavior |
 |---|---|---|
 | `mempal_save_hook_remote.sh` | Every 15 user messages (configurable via `SAVE_INTERVAL` env var) | Backgrounded `curl` POSTs the active transcript to `/ingest/transcript`. Returns immediately so the AI doesn't stall. Idempotent — failed retries are safe. |
 | `mempal_precompact_hook_remote.sh` | Right before context compaction | Synchronous `curl` POST. Blocks until the upload completes (or the hook timeout fires) so memory is durable before context shrinks. |
 Both write logs to `~/.mempalace/hook_state/hook.log`. Tail it during
 setup to confirm uploads are landing.
 ### Optional env vars
 | Variable | Default | Purpose |
 |---|---|---|
 | `MEMPAL_REMOTE_URL` | *(required)* | Server base URL, e.g. `https://unraid.local:8443`. |
 | `MEMPAL_REMOTE_TOKEN` | *(required)* | Bearer token. |
 | `MEMPAL_REMOTE_INSECURE` | unset | Set to `1` to skip TLS verification. Use only with `tls internal`. |
 | `MEMPAL_REMOTE_WING` | unset | Force a specific wing for this client's transcripts. Default: server derives wing from session id. |
 | `SAVE_INTERVAL` | `15` | Messages between save-hook fires. |
 ---
 ## Backfilling history
 The hooks only capture sessions going forward. To mine **past**
 transcripts into the remote palace, on each client run:
 ```bash
 curl -k -X POST \
  -H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
  -H "X-Session-Id: backfill-$(hostname)-$(date +%s)" \
  -H "X-Wing: backfill" \
  --data-binary @/path/to/some-session.jsonl \
  "$MEMPAL_REMOTE_URL/ingest/transcript"
 ```
 For a whole directory of past sessions, loop:
 ```bash
 for f in ~/.claude/projects/**/*.jsonl; do
  curl -k -X POST \
    -H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
    -H "X-Session-Id: $(basename "$f" .jsonl)" \
    --data-binary @"$f" \
    "$MEMPAL_REMOTE_URL/ingest/transcript"
 done
 ```
 The server-side miner is idempotent — re-uploading the same transcript
 won't double-file.
 ---
 ## Backups
 Everything that matters lives in `/mnt/user/appdata/mempalace/`:
 * `palace/` — ChromaDB vector index + SQLite metadata
 * `kg/` — knowledge-graph SQLite
 * `inbox/` — uploaded transcripts (kept for re-mining if needed)
 Add it to your **CA Backup / Appdata Backup** schedule. Losing this
 directory loses all memory.
 The Caddy data dir (`/mnt/user/appdata/mempalace-caddy/data/`) is also
 worth backing up — it contains the auto-generated root CA. Without it,
 re-deploys regenerate the CA and clients have to re-trust it.
 ---
 ## dockerMan template (no-auth, LAN-trust-only)
 If you don't want auth and trust your LAN absolutely (no other people,
 no untrusted IoT, no guests), the `mempalace-server.xml` template gives
 you a single-container, dockerMan-compatible install:
 ```bash
 # Build the image:
 cd /mnt/user/system/build/mempalace
 docker build -t mempalace-server:latest .
 # Install the template:
 cp deploy/unraid/mempalace-server.xml \
   /boot/config/plugins/dockerMan/templates-user/my-MemPalace.xml
 ```
 Then in the Unraid WebUI: Docker → Add Container → "Select a template" →
 **MemPalace** → Apply.
 This path skips Caddy entirely. The MCP SSE endpoint is published bare
 on `:8765`, no TLS, no auth. Anyone on the LAN can read and write the
 palace. **Only use this if you understand and accept that.**
 ---
 ## Troubleshooting
 ### `mcp-proxy` connects but tool calls hang
 Caddy is buffering SSE responses. Verify `flush_interval -1` is set in
 the Caddyfile and that Caddy version is 2.7+ (the compose pulls
 `caddy:2-alpine` which is current).
 ### 401 from every request
 The token in the client's MCP config doesn't match the server's
 `MEMPAL_TOKEN`. Print both to confirm:
 ```bash
 # On Unraid:
 grep MEMPAL_TOKEN /mnt/user/system/build/mempalace/deploy/unraid/.env
 # On client (PowerShell):
 [Environment]::GetEnvironmentVariable("MEMPAL_REMOTE_TOKEN", "User")
 ```
 ### `MineAlreadyRunning` errors in hook logs
 Two clients hit the ingest endpoint simultaneously. The server-side
 miner serializes via `mine_lock` and rejects the second one. The hook
 is idempotent — the next save catches up. If you see this constantly,
 raise `SAVE_INTERVAL` on the chattier client.
 ### Caddy logs `tls: handshake failure`
 Client doesn't trust the self-signed cert. Either trust the root CA
 (see step 6 in install) or set `MEMPAL_REMOTE_INSECURE=1` /
 `PYTHONHTTPSVERIFY=0` on that client.
 ### Container can't start: "address already in use"
 Port 8443 is taken (commonly by Unraid's WebUI HTTPS or another
 service). Edit `docker-compose.yml` and change the host-side mapping:
 ```yaml
    ports:
      - "9443:8443"   # change 9443 to whatever's free
 ```
 Update `MEMPAL_REMOTE_URL` on every client to match.
 ### Embedding model download stalls on first request
 The ~80 MB MiniLM ONNX model downloads from HuggingFace on first
 use. Slow connections can time out the initial mining call. Pre-warm
 it manually:
 ```bash
 docker exec mempalace python -c \
  "from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2; ONNXMiniLM_L6_V2()(['warmup'])"
 ```
 Subsequent uses load from `/data/.cache/chroma/` — ~50 ms.
 ### Logs
 ```bash
 docker logs mempalace          # MCP server, ingest server
 docker logs mempalace-caddy    # auth gate, TLS, access logs
 tail -f ~/.mempalace/hook_state/hook.log   # client-side hook activity
 ```
 ---
 ## Updating
 When this repo updates upstream:
 ```bash
 cd /mnt/user/system/build/mempalace
 git pull
 cd deploy/unraid
 docker compose up -d --build
 ```
 Compose only rebuilds the `mempalace` service (the image hash
 changes); Caddy is pinned to `caddy:2-alpine` and pulls latest within
 the 2.x line.
 Your palace data and Caddy CA persist across rebuilds because they're
 on volumes outside the container.
 ---
 ## Going further
 * **Replace self-signed TLS with Let's Encrypt** — point a real domain at
  Unraid (DDNS or otherwise), open port 80 for ACME challenge, and
  change `tls internal` in `Caddyfile` to `tls your@email`. Caddy
  handles the rest.
 * **Put behind SWAG / Nginx Proxy Manager** — drop the Caddy sidecar,
  keep `mempalace` exposing 8765/8766 internally only, and add the
  routes to your existing reverse proxy. Bearer-token auth and SSE
  pass-through must be configured manually.
 * **Per-machine wings** — set `MEMPAL_REMOTE_WING=<machinename>` on
  each client so transcripts file under separate wings; cross-wing
  search still works via the palace graph.
@@ -0,0 +1,82 @@
 # MemPalace Unraid Compose
 # -----------------------------------------------------------------------------
 # Two-container stack: mempalace (MCP-over-SSE on 8765 + HTTP ingest on 8766,
 # both bound to localhost only) plus a Caddy sidecar that terminates TLS,
 # enforces a bearer token, and reverse-proxies both endpoints on :8443.
 #
 # Use this with the Unraid Compose Manager plugin. Build context is the
 # repo root (../..); on Unraid, sync the repo to /mnt/user/<somewhere>/mempalace
 # and from this directory run:
 #
 #     # 1. Generate a token (do this once, keep it secret):
 #     openssl rand -hex 32 > .env.token
 #     echo "MEMPAL_TOKEN=$(cat .env.token)" > .env
 #     rm .env.token
 #
 #     # 2. Build and start:
 #     docker compose up -d --build
 #
 # Endpoints (after start):
 #     https://<unraid-ip>:8443/sse           — MCP for AI clients
 #     https://<unraid-ip>:8443/ingest/...    — transcript uploads from hooks
 #     https://<unraid-ip>:8443/healthz       — liveness, no auth
 #
 # Caddy uses a self-signed cert (`tls internal`); clients must accept it,
 # typically via a `--insecure`-style flag or by trusting the Caddy root CA.
 # -----------------------------------------------------------------------------
 services:
  mempalace:
    build:
      context: ../..
      dockerfile: Dockerfile
    image: mempalace-server:latest
    container_name: mempalace
    restart: unless-stopped
    # Not published on the host — only Caddy reaches these ports over the
    # internal compose network. This is the auth boundary.
    expose:
      - "8765"
      - "8766"
    volumes:
      - /mnt/user/appdata/mempalace:/data
    environment:
      MEMPALACE_PALACE_PATH: /data/palace
      MEMPALACE_INGEST_PORT: "8766"
      MEMPALACE_INGEST_HOST: "0.0.0.0"
      # Defense-in-depth — Caddy is the primary gate, but if it's bypassed
      # (e.g. someone exec'd into the container's network), the ingest
      # server still requires the token.
      MEMPALACE_INGEST_TOKEN: "${MEMPAL_TOKEN}"
      # Languages for entity detection (comma-separated):
      # MEMPALACE_ENTITY_LANGUAGES: en
    user: "99:100"
    networks:
      - mempal
    # Override the image CMD: bind mcp-proxy to all interfaces inside the
    # container network so Caddy can reach it. The ingest server thread
    # spawns from MEMPALACE_INGEST_PORT.
    command: >
      mcp-proxy --sse-host 0.0.0.0 --sse-port 8765
      --pass-environment -- mempalace-mcp
  caddy:
    image: caddy:2-alpine
    container_name: mempalace-caddy
    restart: unless-stopped
    depends_on:
      - mempalace
    ports:
      - "8443:8443"
    volumes:
      - ./Caddyfile:/etc/caddy/Caddyfile:ro
      - /mnt/user/appdata/mempalace-caddy/data:/data
      - /mnt/user/appdata/mempalace-caddy/config:/config
    environment:
      MEMPAL_TOKEN: "${MEMPAL_TOKEN}"
    networks:
      - mempal
 networks:
  mempal:
    driver: bridge
@@ -0,0 +1,99 @@
 <?xml version="1.0"?>
 <Container version="2">
  <Name>MemPalace</Name>
  <Repository>mempalace-server:latest</Repository>
  <Registry>https://github.com/MemPalace/mempalace</Registry>
  <Network>bridge</Network>
  <MyIP/>
  <Shell>sh</Shell>
  <Privileged>false</Privileged>
  <Support>https://github.com/MemPalace/mempalace/issues</Support>
  <Project>https://github.com/MemPalace/mempalace</Project>
  <Overview>
    Local-first AI memory server. Stores conversations and project content
    verbatim in a searchable palace, exposed to MCP-compatible AI tools
    (Claude Code, Codex, Antigravity, etc.) over Server-Sent Events on
    port 8765.
    The image is built locally — see Dockerfile in the repo root. From the
    Unraid CLI:
        cd /mnt/user/&lt;path&gt;/mempalace
        docker build -t mempalace-server:latest .
    Then add this template via Add Container -- Template.
    Mount /mnt/user/appdata/mempalace to /data for persistent storage of
    the ChromaDB index, SQLite knowledge graph, and embedding-model cache.
    SECURITY: this container exposes the MCP endpoint without authentication.
    Bind it to a trusted network (LAN-only or Tailscale) or place it behind
    SWAG / Nginx Proxy Manager with bearer-token or basic auth.
    Endpoint: http://[UNRAID-IP]:8765/sse
  </Overview>
  <Category>Productivity: Tools: Other:</Category>
  <WebUI/>
  <TemplateURL/>
  <Icon>https://raw.githubusercontent.com/MemPalace/mempalace/develop/assets/mempalace_logo.png</Icon>
  <ExtraParams>--user 99:100</ExtraParams>
  <PostArgs/>
  <CPUset/>
  <DateInstalled/>
  <DonateText/>
  <DonateLink/>
  <Description>
    Persistent AI memory across machines. Connect Claude Code, Codex,
    Antigravity, or any MCP-compatible client to a single shared palace.
  </Description>
  <Config Name="MCP SSE port"
          Target="8765"
          Default="8765"
          Mode="tcp"
          Description="Port the MCP-over-SSE endpoint listens on. Clients connect to http://[UNRAID-IP]:[PORT]/sse."
          Type="Port"
          Display="always"
          Required="true"
          Mask="false">8765</Config>
  <Config Name="Appdata"
          Target="/data"
          Default="/mnt/user/appdata/mempalace"
          Mode="rw"
          Description="Persistent storage for the palace (ChromaDB), knowledge graph (SQLite), embedding-model cache, and config."
          Type="Path"
          Display="always"
          Required="true"
          Mask="false">/mnt/user/appdata/mempalace</Config>
  <Config Name="Palace path (inside container)"
          Target="MEMPALACE_PALACE_PATH"
          Default="/data/palace"
          Mode=""
          Description="Subdirectory inside /data where ChromaDB files live. Change only if migrating from a different layout."
          Type="Variable"
          Display="advanced"
          Required="false"
          Mask="false">/data/palace</Config>
  <Config Name="Embedding device"
          Target="MEMPALACE_EMBEDDING_DEVICE"
          Default=""
          Mode=""
          Description="ONNX execution provider: cpu | cuda | dml | coreml. Leave blank for auto. CUDA requires the NVIDIA Driver plugin and GPU passthrough; the image must be rebuilt with the [gpu] extra installed."
          Type="Variable"
          Display="advanced"
          Required="false"
          Mask="false"></Config>
  <Config Name="Entity-detection languages"
          Target="MEMPALACE_ENTITY_LANGUAGES"
          Default="en"
          Mode=""
          Description="Comma-separated language codes for entity detection (e.g. en,es,de)."
          Type="Variable"
          Display="advanced"
          Required="false"
          Mask="false">en</Config>
 </Container>
@@ -1,144 +0,0 @@
 # MemPalace — History, Corrections, and Public Notices
 This file is the canonical record of post-launch corrections, public notices,
 and retractions that affect MemPalace's public claims. Newest first.
 ---
 ## 2026-04-14 — Benchmark table rewrite (issue [#875](https://github.com/MemPalace/mempalace/issues/875))
 A community audit identified a category error in the public benchmark tables
 on `README.md` and `mempalaceofficial.com`: MemPalace's retrieval recall
 numbers (R@5, R@10) were listed in the same columns as competitors'
 end-to-end QA accuracy numbers. They are different metrics and are not
 comparable — a system can have 100% retrieval recall and 40% QA accuracy.
 The audit also found that the retracted "+34% palace boost" claim (see the
 April 7 note below) was still present in multiple surfaces despite that
 retraction, and that two competitor numbers (`Mem0 ~85%`, `Zep ~85%`) had no
 published source and did not match the metrics those projects actually
 publish.
 What changed in this PR:
 - The headline number on all surfaces is now **96.6% R@5 on LongMemEval in
  raw mode**, independently reproduced on Linux x86_64 against the tagged
  v3.3.0 release on 2026-04-14. Result JSONLs are committed under
  `benchmarks/results_*.jsonl` (see PR description for the scorecard).
 - The **"100% with Haiku rerank"** claim has been removed from all public
  comparison tables. It reproduces on our machines and with a different LLM
  family (minimax-m2.7 via Ollama Cloud: 99.2% R@5 / 100.0% R@10 on the full
  500-question LongMemEval set) — but the 99.4% → 100% step was developed
  by inspecting three specific wrong answers (`benchmarks/BENCHMARKS.md` has
  called this "teaching to the test" since February). It belongs in the
  methodology document, not in a headline.
 - The **honest held-out number** for the hybrid pipeline — 98.4% R@5 on 450
  questions that `hybrid_v4` was never tuned on, deterministic seed — is now
  the comparable figure when an LLM rerank is involved.
 - The **retracted "+34% palace boost"** has been removed from
  `README.md`, `website/concepts/the-palace.md`,
  `website/guide/searching.md`, and `website/reference/contributing.md`.
  Wing and room filters remain useful — they're standard metadata filters —
  but they are not presented as a novel retrieval improvement.
 - **Competitor comparison tables** mixing retrieval recall with QA accuracy
  have been removed from `README.md` and `website/reference/benchmarks.md`.
  Where MemPalace can be fairly compared on the same metric, we link to the
  cited source. Otherwise we report our own numbers and let readers draw
  their own conclusions.
 - **Reproduction instructions** in `benchmarks/BENCHMARKS.md` and
  `benchmarks/README.md` were pointing at a defunct branch
  (`aya-thekeeper/mempal`); they now point at `MemPalace/mempalace`.
 - The **LoCoMo 100% R@10 with top-50 rerank** row has been removed from
  public comparison surfaces. With per-conversation session counts of 19–32
  and `top_k=50`, the retrieval stage returns every session in the
  conversation by construction, so the number measures an LLM's
  reading comprehension over the whole conversation, not retrieval.
 Thanks to [@dial481](https://github.com/MemPalace/mempalace/issues/875) for
 the detailed audit and to [@rohitg00](https://github.com/rohitg00) for the
 parallel write-up in Discussion #747.
 ---
 ## 2026-04-11 — Impostor domains and malware
 Several community members (issues #267, #326, #506) reported fake MemPalace
 websites distributing malware. The only official surfaces for this project
 are:
 - This GitHub repository: [github.com/MemPalace/mempalace](https://github.com/MemPalace/mempalace)
 - The PyPI package: [pypi.org/project/mempalace](https://pypi.org/project/mempalace/)
 - The docs site: [mempalaceofficial.com](https://mempalaceofficial.com)
 Any other domain — `mempalace.tech` being the one most commonly reported —
 is not ours. Never run install scripts from unofficial sites.
 Thanks to our community members for flagging the problem.
 ---
 ## 2026-04-07 — A Note from Milla & Ben
 > The community caught real problems in this README within hours of launch
 > and we want to address them directly.
 >
 > **What we got wrong:**
 >
 > - **The AAAK token example was incorrect.** We used a rough heuristic
 >   (`len(text)//3`) for token counts instead of an actual tokenizer. Real
 >   counts via OpenAI's tokenizer: the English example is 66 tokens, the
 >   AAAK example is 73. AAAK does not save tokens at small scales — it's
 >   designed for *repeated entities at scale*, and the README example was a
 >   bad demonstration of that. We're rewriting it.
 >
 > - **"30x lossless compression" was overstated.** AAAK is a lossy
 >   abbreviation system (entity codes, sentence truncation). Independent
 >   benchmarks show AAAK mode scores **84.2% R@5 vs raw mode's 96.6%** on
 >   LongMemEval — a 12.4 point regression. The honest framing is: AAAK is
 >   an experimental compression layer that trades fidelity for token
 >   density, and **the 96.6% headline number is from RAW mode, not AAAK**.
 >
 > - **"+34% palace boost" was misleading.** That number compares unfiltered
 >   search to wing+room metadata filtering. Metadata filtering is a
 >   standard feature of the underlying vector store, not a novel retrieval
 >   mechanism. Real and useful, but not a moat.
 >
 > - **"Contradiction detection"** exists as a separate utility
 >   (`fact_checker.py`) but is not currently wired into the knowledge graph
 >   operations as the README implied.
 >
 > - **"100% with Haiku rerank"** is real (we have the result files) but
 >   the rerank pipeline is not in the public benchmark scripts. We're
 >   adding it.
 >
 > **What's still true and reproducible:**
 >
 > - **96.6% R@5 on LongMemEval in raw mode**, on 500 questions, zero API
 >   calls — independently reproduced on M2 Ultra in under 5 minutes by
 >   [@gizmax](https://github.com/MemPalace/mempalace/issues/39).
 > - Local, free, no subscription, no cloud, no data leaving your machine.
 > - The architecture (wings, rooms, closets, drawers) is real and useful,
 >   even if it's not a magical retrieval boost.
 >
 > **What we're doing:**
 >
 > 1. Rewriting the AAAK example with real tokenizer counts and a scenario
 >    where AAAK actually demonstrates compression
 > 2. Adding `mode raw / aaak / rooms` clearly to the benchmark
 >    documentation so the trade-offs are visible
 > 3. Wiring `fact_checker.py` into the KG ops so the contradiction
 >    detection claim becomes true
 > 4. Pinning the vector store dependency to a tested range (issue #100),
 >    fixing the shell injection in hooks (#110), and addressing the macOS
 >    ARM64 segfault (#74)
 >
 > **Thank you to everyone who poked holes in this.** Brutal honest
 > criticism is exactly what makes open source work, and it's what we asked
 > for. Special thanks to
 > [@panuhorsmalahti](https://github.com/MemPalace/mempalace/issues/43),
 > [@lhl](https://github.com/MemPalace/mempalace/issues/27),
 > [@gizmax](https://github.com/MemPalace/mempalace/issues/39), and everyone
 > who filed an issue or a PR in the first 48 hours. We're listening, we're
 > fixing, and we'd rather be right than impressive.
 >
 > — *Milla Jovovich & Ben Sigman*
@@ -1,768 +0,0 @@
 # RFC 002 — Source Adapter Plugin Specification
 - **Status:** Draft
 - **Tracking issue:** [#989](https://github.com/MemPalace/mempalace/issues/989)
 - **Related:** [#274](https://github.com/MemPalace/mempalace/issues/274), [#23](https://github.com/MemPalace/mempalace/pull/23), [#169](https://github.com/MemPalace/mempalace/pull/169), [#232](https://github.com/MemPalace/mempalace/pull/232), [#567](https://github.com/MemPalace/mempalace/pull/567), [#98](https://github.com/MemPalace/mempalace/pull/98), [#591](https://github.com/MemPalace/mempalace/pull/591), [#592](https://github.com/MemPalace/mempalace/pull/592), [#702](https://github.com/MemPalace/mempalace/pull/702), [#981](https://github.com/MemPalace/mempalace/issues/981), [#244](https://github.com/MemPalace/mempalace/pull/244), [#419](https://github.com/MemPalace/mempalace/pull/419), [#300](https://github.com/MemPalace/mempalace/pull/300), [#952](https://github.com/MemPalace/mempalace/pull/952), [#389](https://github.com/MemPalace/mempalace/pull/389), [#434](https://github.com/MemPalace/mempalace/pull/434)
 - **Sibling spec:** [RFC 001 — Storage Backend Plugin Specification](001-storage-backend-plugin-spec.md)
 - **Spec version:** `1.0`
 ## Summary
 A formal contract for MemPalace source adapters so third parties can ship `pip install mempalace-source-<name>` packages (Cursor, OpenCode, git, Slack, Notion, email, calendar, Whisper transcripts, …) that drop into `mempalace mine` without patching core. The spec defines the adapter interface, record shape, metadata schema contract, privacy class, entry-point registration, incremental-ingest semantics, closet integration, a declared-transformation model that replaces the informal "verbatim" promise with a verifiable one, conformance tests, and the refactor of the existing file and conversation miners into first-party adapters on the same contract.
 RFC 001 formalized the write side (where drawers are stored). This RFC formalizes the read side (where content comes from). Both are required for MemPalace to function as a durable daemon managing heterogeneous palaces across many source types.
 ## Motivation
 Six source ingesters are currently in flight, each solving the same problem a different way:
 | PR / Issue | Source | Mechanism |
 |---|---|---|
 | [#274](https://github.com/MemPalace/mempalace/issues/274) | Cursor | `workspaceStorage/*.vscdb` SQLite extraction |
 | [#23](https://github.com/MemPalace/mempalace/pull/23) | OpenCode | SQLite session database |
 | [#169](https://github.com/MemPalace/mempalace/pull/169) | Pi agent | JSONL session normalizer |
 | [#232](https://github.com/MemPalace/mempalace/pull/232) | Cursor (JSONL variant) | JSONL normalizer |
 | [#567](https://github.com/MemPalace/mempalace/pull/567), [#98](https://github.com/MemPalace/mempalace/pull/98) | Git | `git log` + `gh pr view` with structured diff summary |
 | [#591](https://github.com/MemPalace/mempalace/pull/591), [#592](https://github.com/MemPalace/mempalace/pull/592) | Delphi Oracle | Real-time intelligence signals |
 | [#702](https://github.com/MemPalace/mempalace/pull/702) | Cursor + factory.ai | Combined session miners |
 Plus three ingesters already grafted into core:
 - `mempalace/miner.py` — filesystem project miner, fixed char-window chunking, keyword hall routing
 - `mempalace/convo_miner.py` — chat transcript miner with exchange-pair chunking
 - `mempalace/normalize.py` — format detection for four chat-export shapes (Claude Code JSONL, Codex JSONL, Claude.ai / ChatGPT / Slack JSON)
 Plus one open proposal for a different ingest semantic:
 - [#981](https://github.com/MemPalace/mempalace/issues/981) — path-level descriptions: mine metadata-as-content instead of raw bytes for matched paths. This is a legitimate third ingest mode (alongside chunked-content and whole-record) that the current architecture has no home for.
 Each contributor has reinvented source discovery, source-item identity, incremental-ingest bookkeeping, metadata shape, and chunking strategy. Format detection for new chat exports lands in `normalize.py` as one more branch in an `if` chain. There is no shared abstraction, no conformance suite, and no contract new adapter authors can build against.
 This is the same situation RFC 001 addresses for storage backends: a pattern that emerged organically, now needs a specification so the community can contribute cleanly and enterprises can build against a stable surface.
 ### Why this matters beyond developer tooling
 The adapter pattern is source-agnostic. What has so far shown up as "Cursor transcripts" and "git commits" generalizes to:
 - **Knowledge work** — Notion, Obsidian, Logseq, Google Docs, iA Writer, Zettlr
 - **Communications** — Slack, Discord, Teams, Signal backups, mbox/eml email, iMessage
 - **Research** — arXiv PDFs, Zotero libraries, bookmarked articles, Kindle highlights, web archives
 - **Creator workflows** — YouTube captions, podcast transcripts (Whisper/Deepgram), Descript projects
 - **Regulated domains** — medical records, legal filings, financial statements (all gated on §6 privacy class)
 Enterprises key on their own domain metadata — `repo/PR/SHA` for engineering, `patient/encounter/CPT` for healthcare, `case/docket/jurisdiction` for legal. The schema lives in the adapter; the content lives in the drawer. This is how structured-data use cases are served without violating the byte-preservation commitments adapters make.
 ## Goals
 1. A source adapter ships as a standalone Python package; `pip install mempalace-source-<name>` is sufficient to use it.
 2. `mempalace mine` and the MCP mine tool are source-agnostic — all extraction goes through registered adapters. No `if source_type == 'foo'` branches in core.
 3. Content transformations are **declared** (§1.4): each adapter advertises the set of transformations it applies to source bytes. Byte-preserving adapters declare the empty set. Consumers can programmatically determine what happened to their data.
 4. Incremental ingest is cheap and correct: re-running mine only touches items whose source-side version changed, using the palace itself as the cursor (no sidecar).
 5. Each adapter declares a structured metadata schema. Enterprises index and filter on that schema. Core is schema-agnostic beyond the universal fields in §5.1.
 6. The existing `miner.py` and `convo_miner.py` become the first two first-party adapters on the new contract. Drawer metadata fields and field names are preserved — the spec adds fields, does not rename them.
 7. A privacy class is declarable at the adapter boundary so sensitive sources (medical, financial, personal comms) are handled with explicit policy rather than implicit trust.
 ## Non-goals
 - Defining chunking. Each adapter owns its chunking strategy — tree-sitter for code, exchange-pair for chat, whole-record for a PR. Core does not impose a chunk size.
 - Defining live-stream / webhook shapes (the Delphi Oracle pattern of continuous signal ingestion). That is a separate future RFC; v1 is pull-mode.
 - Defining LLM-based structured extraction. Adapters MAY use an LLM; the spec does not mandate or standardize this.
 - Defining cross-adapter dedup. When the same content appears via two adapters (e.g., a PR body mined via `git` and as a conversation quote mined via `claude-code`), both drawers land. Deduplication policy is a separate concern handled at query time by `searcher.py`.
 - Defining closet construction. Core continues to build closets from adapter-yielded drawers (§1.7); the closet-building algorithm itself is not part of this spec.
 ---
 ## 1. Source adapter contract
 ### 1.1 Required method
 All adapters implement `BaseSourceAdapter` with a single kwargs-only ingest method:
 ```python
 class BaseSourceAdapter(ABC):
    @abstractmethod
    def ingest(
        self,
        *,
        source: SourceRef,
        palace: PalaceContext,
    ) -> Iterator[IngestResult]:
        """Enumerate and extract content from a source.
        Yields a stream of IngestResult values. Lazy adapters yield
        `SourceItemMetadata` ahead of the drawers for that item, so core
        can report progress and check `is_current` before the adapter
        commits to the fetch. Adapters with no lazy-fetch benefit may
        interleave `SourceItemMetadata` and `DrawerRecord` items freely.
        """
    @abstractmethod
    def describe_schema(self) -> AdapterSchema:
        """Declare the structured metadata this adapter attaches.
        Returned value is stable for a given adapter version. Enterprises
        index on this schema; core uses it to validate adapter output.
        """
 ```
 The single-method `ingest()` contract was chosen over a `discover` / `extract` split. Most current ingesters have no meaningful laziness benefit (filesystem walking is cheap, transcript normalizing is cheap). Adapters that do (git-mine's `gh pr list` vs `gh pr view`; hypothetical Slack/Notion API) express laziness by yielding `SourceItemMetadata` first and deferring fetch until core confirms staleness via `is_current()`.
 ### 1.2 Optional methods (default implementations on the ABC)
 ```python
 def is_current(
    self,
    *,
    item: SourceItemMetadata,
    existing_metadata: dict | None,
 ) -> bool:
    """Return True if the palace already has an up-to-date copy.
    Called by core after querying the palace for existing drawers with
    matching source_file. The adapter compares its version token against
    the stored metadata and returns True to skip extraction.
    Default implementation: returns False (always re-extract). Adapters
    advertising `supports_incremental` override this.
    """
    return False
 def source_summary(self, *, source: SourceRef) -> SourceSummary:
    """Describe a source without extracting (e.g., 'git repo mempalace,
    847 commits, 132 PRs'). Default: returns empty summary."""
    return SourceSummary(description=self.name)
 def close(self) -> None:
    return None
 ```
 Core's incremental loop (pseudocode):
 ```python
 for result in adapter.ingest(source=source, palace=ctx):
    if isinstance(result, SourceItemMetadata):
        existing = ctx.collection.get(where={"source_file": result.source_file}, limit=1)
        if adapter.is_current(item=result, existing_metadata=existing):
            ctx.skip_current_item()   # adapter stops yielding drawers for this item
    elif isinstance(result, DrawerRecord):
        ctx.upsert_drawer(result)
 ```
 ### 1.3 Typed records
 ```python
@dataclass(frozen=True)
 class SourceRef:
    """A handle to the source a user wants to ingest.
    local_path is for filesystem-rooted sources (project dir, mbox file).
    uri is for URL-like references (github.com/org/repo, slack://workspace/channel).
    options carries adapter-specific config (non-secret values only; §M2).
    """
    local_path: str | None = None
    uri: str | None = None
    options: dict = field(default_factory=dict)
@dataclass(frozen=True)
 class SourceItemMetadata:
    """Lightweight pointer yielded before drawers for lazy-fetch adapters."""
    source_file: str                 # Logical identity — filesystem path, PR URI, etc.
    version: str                     # Source-side version token (mtime, commit SHA, ETag, rev id).
    size_hint: int | None = None     # Bytes, if known. Used for progress reporting.
    route_hint: RouteHint | None = None
@dataclass(frozen=True)
 class DrawerRecord:
    """One drawer's worth of content plus metadata."""
    content: str                     # Subject to §1.4 declared transformations.
    source_file: str                 # Foreign key to SourceItemMetadata.source_file.
    chunk_index: int = 0             # 0 for single-drawer items; 0..N-1 for chunked items.
    metadata: dict = field(default_factory=dict)  # Flat: str/int/float/bool only. Must conform to adapter schema.
    route_hint: RouteHint | None = None
@dataclass(frozen=True)
 class RouteHint:
    wing: str | None = None
    room: str | None = None
    hall: str | None = None
@dataclass(frozen=True)
 class SourceSummary:
    description: str
    item_count: int | None = None
 # IngestResult is the union type adapters yield.
 IngestResult = SourceItemMetadata | DrawerRecord
 # PalaceContext carries collection handles, palace config, and progress hooks
 # into the adapter. Full definition in §9 (cleanup prerequisite).
 ```
 ### 1.4 Declared transformations
 Adapters cannot silently alter content. Every adapter declares the set of transformations it applies:
 ```python
 class BaseSourceAdapter(ABC):
    declared_transformations: ClassVar[frozenset[str]] = frozenset()
 ```
 The invariant: **no transformation is applied that is not declared in this set**. Adapters declaring `frozenset()` are byte-preserving end-to-end (modulo the read, which may itself involve `utf8_replace_invalid` — see below).
 Reserved transformation names (v1):
 | Name | Meaning |
 |---|---|
 | `utf8_replace_invalid` | Undecodable bytes replaced with U+FFFD on read (equivalent to `open(..., errors="replace")`). |
 | `newline_normalize` | CRLF / CR converted to LF. |
 | `whitespace_trim` | Leading / trailing whitespace stripped at a record boundary. |
 | `whitespace_collapse_internal` | Runs of three or more blank lines collapsed to two. |
 | `line_trim` | Each line individually stripped of leading / trailing whitespace. |
 | `line_join_spaces` | Adjacent lines joined with single spaces, newlines discarded. |
 | `blank_line_drop` | Empty lines between non-empty lines dropped. |
 | `strip_tool_chrome` | System tags, hook output, tool UI chrome removed (see `normalize.strip_noise`). |
 | `tool_result_truncate` | Tool output heads/tails kept; middle replaced with a marker string. |
 | `spellcheck_user` | User turns rewritten by spellcheck. |
 | `synthesized_marker` | Adapter inserts its own strings (e.g., `[N lines omitted]`, `[registry] …`, Slack provenance footer). |
 | `speaker_role_assignment` | Multi-party speakers alternately assigned `user` / `assistant` roles (Slack). |
 | `tool_result_omitted` | Some tool outputs fully omitted from transcript (e.g., Read/Edit/Write results in `normalize._format_tool_result`). |
 Adapters MAY define their own transformation names for behaviors the reserved list does not cover. Third-party names SHOULD be prefixed with the adapter name to avoid collisions (e.g., `cursor.composer_ordering`).
 **Capability derivation:**
 - `byte_preserving` — declared_transformations is empty AND output bytes equal input bytes for any source the adapter can read. Advertised via the `byte_preserving` capability (§2.1). MUST be verified by §7.2 round-trip test.
 - `declared_lossy` — declared_transformations is non-empty. The adapter's output is reproducible from source by applying *only* the declared transformations. MUST be verified by §7.3 declared-transformation test.
 **Existing code mapping (for the cleanup PR):**
 | Module | Declared transformations |
 |---|---|
 | `filesystem` (current `miner.py`) | `utf8_replace_invalid`, `whitespace_trim` |
 | `conversations` (current `convo_miner.py` + `normalize.py`) | `utf8_replace_invalid`, `newline_normalize`, `line_trim`, `line_join_spaces`, `blank_line_drop`, `whitespace_collapse_internal`, `strip_tool_chrome`, `tool_result_truncate`, `tool_result_omitted`, `spellcheck_user`, `synthesized_marker`, `speaker_role_assignment` |
 The filesystem adapter is nearly byte-preserving today; the conversations adapter is extensively transformed. Both are honest after this spec lands because both are fully declared.
 This replaces the MISSION.md promise of "verbatim always" with a stronger one: every adapter publishes what it does to your data, and the conformance suite verifies it hasn't lied. "Verbatim" becomes a capability some adapters hold (byte_preserving), not a global claim about a lossy pipeline.
 ### 1.5 Three ingest modes
 A single adapter declares one or more of three modes via a class attribute:
 ```python
 class BaseSourceAdapter(ABC):
    supported_modes: ClassVar[frozenset[Literal["chunked_content", "whole_record", "metadata_only"]]]
 ```
 | Mode | Content origin |
 |---|---|
 | `chunked_content` | Source bytes, split into chunks the adapter chooses (current filesystem behavior). |
 | `whole_record` | Source bytes, one drawer per source item (e.g., PR → 1 drawer). |
 | `metadata_only` | Synthesized description of a source item (absorbs #981). The description bytes are authored by the user or adapter, not the source. Declared transformations (§1.4) do not apply — content is not derived from source bytes. |
 `metadata_only` resolves #981: description-mode matches a path pattern and produces one drawer whose content is the user-authored description rather than the file contents. Conformance tests (§7.2, §7.3) skip `metadata_only` records.
 An adapter MAY support multiple modes and select per-item; the per-item mode is recorded in `metadata["ingest_mode"]` (§5.1). This field already exists on conversation drawers (`convo_miner.py:346`) and is the only existing field whose semantics this spec extends rather than preserves.
 ### 1.6 Chunking delegation
 Core does not impose chunking. `miner.py`'s 800-character sliding window is the filesystem adapter's default for unknown file types — not a contract. Adapter authors choose what makes sense:
 - Code files → tree-sitter function/class boundaries (future enhancement to the filesystem adapter).
 - Chat transcripts → exchange pairs (current `convo_miner.py` behavior).
 - PRs → whole-record (current `git-mine` behavior in #567).
 - PDFs → page or section.
 - Voice transcripts → speaker turn.
 The sole cross-adapter requirement for `chunked_content` mode: chunks for a given `source_file`, re-assembled in `chunk_index` order and accounting for declared transformations in §1.4, reproduce the adapter's internal representation of the source. The conformance suite verifies this.
 ### 1.7 Closet integration
 Closets are the AAAK-compressed index layer (`palace.build_closet_lines`, `upsert_closet_lines`) that points to drawer content and enables LLM-scale scanning without reading every drawer. Closet-building is not an adapter concern:
 - **Core builds closets** from adapter-yielded drawers as a post-step, via the existing `palace.py` helpers. Adapters do not call these APIs.
 - **Adapters MAY emit closet hints** in drawer metadata via a flat `;`-joined string:
  ```python
  metadata["closet_hints"] = "decided GraphQL; migrated to Postgres; fixed PR-567"
  ```
  Core splits on `;` and feeds these as candidate topics alongside the content-scanned ones in `build_closet_lines`. The git adapter can hint decision-signal quotes that raw content-scanning would miss; the conversations adapter can hint section headers; the filesystem adapter has no need and omits the field.
 - **metadata_only drawers get closets too.** Core builds them from the synthesized description content the same way it builds closets for any other drawer. This is how #981's path-level descriptions become searchable.
 - **Closet purging** remains keyed on `source_file` (`purge_file_closets` in `palace.py:221`). Adapters' source_file values must be stable so purge is correct on re-ingest.
 Current `convo_miner.py` does not build closets for conversation drawers — an existing gap. The cleanup PR (§9) routes the conversations adapter through the same post-step closet builder as filesystem, closing the gap as a side effect.
 ---
 ## 2. Adapter contract
 ### 2.1 Identity and capabilities
 ```python
 class BaseSourceAdapter(ABC):
    name: ClassVar[str]                    # "filesystem", "cursor", "git", "slack", ...
    spec_version: ClassVar[str] = "1.0"
    adapter_version: ClassVar[str]         # Independent of spec_version; recorded on every drawer.
    capabilities: ClassVar[frozenset[str]]
    supported_modes: ClassVar[frozenset[str]]             # Per §1.5.
    declared_transformations: ClassVar[frozenset[str]]    # Per §1.4.
    default_privacy_class: ClassVar[str]                  # Per §6.
 ```
 Defined capability tokens (v1):
 | Token | Meaning |
 |---|---|
 | `byte_preserving` | `declared_transformations` is empty AND extracted content equals source bytes. |
 | `supports_incremental` | Implements `is_current()` meaningfully; `ingest()` respects `ctx.skip_current_item()`. |
 | `supports_structured_metadata` | Attaches fields beyond §5.1 universals. |
 | `supports_entity_hints` | Emits entity hints via `metadata["entity_hints_json"]` (§5.4). |
 | `supports_kg_triples` | Writes knowledge-graph triples directly to the SQLite KG (§5.5). |
 | `supports_closet_hints` | Emits `metadata["closet_hints"]` (§1.7). |
 | `requires_auth` | Needs credentials at runtime (env vars — §4.2). |
 | `requires_external_service` | Needs a running service (Slack API, email server). |
 | `requires_local_tool` | Needs a local binary (`gh`, `rg`, `whisper`). |
 | `adapter_owns_routing` | Returns authoritative `RouteHint` values from `ingest()` that core uses as-is (§G3 / §2.5). |
 | `respects_privacy_class` | Honors §6 privacy-class filtering. |
 Capability tokens are free-form strings; third-party adapters MAY declare novel tokens for their ecosystem. Core only inspects the above.
 ### 2.2 Source references
 See `SourceRef` in §1.3. The shape is deliberately open — adapters parse `uri` and `options` as they see fit. Core does not canonicalize URIs.
 **Secrets in `SourceRef.options`:** credentials MUST NOT be placed in `options`. The spec reserves `options` for non-secret values (paths, filters, date ranges). Secrets come from env vars per §4.2. An adapter that reads a credential from `options` violates the spec and MUST be rejected by the conformance suite.
 ### 2.3 Lifecycle
 1. `__init__`: lightweight. No I/O, no network, no credential fetch.
 2. First call to `ingest`: may open resources. All I/O is lazy.
 3. `close()`: releases all resources. After `close()`, further calls MUST raise `AdapterClosedError`.
 ### 2.4 Concurrency
 An adapter instance is long-lived and serves many mine operations. Adapters MUST be thread-safe for concurrent `ingest` calls across different `SourceRef` values. MemPalace core serializes calls within a single `SourceRef` unless an adapter advertises `supports_parallel_ingest` (not in v1 — reserved for v1.1).
 ### 2.5 Routing
 Routing is the adapter's responsibility. The filesystem adapter reads `mempalace.yaml` (hall keywords, rooms list) via `MempalaceConfig()` and returns `RouteHint(wing=..., room=..., hall=...)` on each drawer. This relocates `detect_room()` and `detect_hall()` (currently in `miner.py` and `convo_miner.py`) into their respective adapters.
 Order of precedence for routing:
 1. Explicit `--wing` / `--room` CLI flags → passed through `SourceRef.options` → adapter honors verbatim.
 2. Palace config match (`mempalace.yaml` hall keywords, room keywords) → adapter computes.
 3. Adapter-internal fallback (e.g., filesystem adapter falls back to `"general"` room).
 Adapters advertising `adapter_owns_routing` return the final answer; core uses it verbatim. Adapters not advertising it return None and core applies a generic fallback router (writing to wing `default`, room `general`, hall `general`). Absent any adapter, this is how `mempalace mine` behaves today.
 ### 2.6 Incremental ingest
 `is_current()` is the incremental-ingest primitive. The palace itself is the cursor — no separate persisted state. Correctness requirements:
 - The adapter's `SourceItemMetadata.source_file` MUST be stable across re-ingests of the same logical item. Filesystem adapter uses the absolute path (as today). Git adapter uses a URI shape like `github.com/org/repo#pr=567` or `github.com/org/repo#commit=abc123`.
 - `is_current()` returns True when the stored metadata matches the adapter's current version token. The default implementation returns False (always re-extract) — adapters advertising `supports_incremental` override.
 - Deletion tombstones: an adapter MAY yield a `SourceItemMetadata(source_file=..., version="__deleted__")` entry — core purges drawers with matching `source_file` and builds no new drawers for that item. Advertised via `supports_deletion_tombstones`.
 - Adapters without `supports_incremental` ignore `is_current()` and fully re-extract. Core logs a warning.
 ### 2.7 Errors
 - `SourceNotFoundError` — the `SourceRef` does not resolve.
 - `AuthRequiredError` — adapter needs credentials; raises with a message describing which env vars to set.
 - `AdapterClosedError` — method called after `close()`.
 - `TransformationViolationError` — conformance suite raises this when the content round-trip requires an undeclared transformation.
 - `SchemaConformanceError` — a `DrawerRecord.metadata` is missing required fields declared in `describe_schema()` or violates declared types.
 ---
 ## 3. Registration and discovery
 ### 3.1 Entry points (primary mechanism)
 Third-party adapters ship as installable packages:
 ```toml
 # pyproject.toml of mempalace-source-cursor
 [project.entry-points."mempalace.sources"]
 cursor = "mempalace_source_cursor:CursorAdapter"
 ```
 MemPalace discovers adapters at process start via `importlib.metadata.entry_points(group="mempalace.sources")`.
 ### 3.2 In-tree registry (secondary)
 ```python
 from mempalace.sources.registry import register
 register("my-experimental-adapter", MyAdapter)
 ```
 Entry-point discovery and explicit `register()` populate the same registry. Explicit registration wins on name conflict.
 ### 3.3 Selection (explicit only — no auto-detect)
 Unlike storage backends (RFC 001 §3.3), source adapters are never auto-detected. The user selects the adapter explicitly:
 ```bash
 mempalace mine --source cursor ~/                      # explicit adapter
 mempalace mine --source git /path/to/repo              # explicit adapter
 mempalace mine --source filesystem /path/to/project    # explicit adapter
 mempalace mine /path/to/project                        # implicit: filesystem (default)
 ```
 The default when no `--source` is given is `filesystem`, preserving current `mempalace mine <path>` behavior.
 **Backwards compatibility with `--mode`.** Current `cli.py:517-519` exposes `--mode {projects,convos}`. This spec maps:
 - `--mode projects` → `--source filesystem` (the new default)
 - `--mode convos` → `--source conversations`
 `--mode` stays as a deprecated alias through v4.x with a deprecation warning on use; removed in v5.0.
 Auto-detection would be hostile — a directory containing a `.git` folder, a `workspaceStorage/` subdir, and an `mbox` file is not a signal of user intent.
 ---
 ## 4. Configuration
 ### 4.1 Shape
 ```json
 {
  "sources": {
    "my-cursor": {
      "type": "cursor",
      "workspace_storage": "~/Library/Application Support/Cursor/User/workspaceStorage"
    },
    "my-git": {
      "type": "git",
      "repos": ["/projects/mempalace", "/projects/site"]
    }
  },
  "palaces": {
    "work": {
      "sources": ["my-git"],
      "privacy_floor": "internal"
    },
    "personal": {
      "sources": ["my-cursor"]
    }
  }
 }
 ```
 Single-user local mode: config is optional. `mempalace mine <path>` with no config uses the `filesystem` adapter and defaults.
 ### 4.2 Environment variables
 - `MEMPALACE_SOURCE_<NAME>_*` — per-adapter secrets and connection info. Examples: `MEMPALACE_SOURCE_SLACK_TOKEN`, `MEMPALACE_SOURCE_NOTION_API_KEY`, `MEMPALACE_SOURCE_GIT_GITHUB_TOKEN`.
 - Secrets MUST be readable from env vars; config files carry structure, env vars carry credentials. Same rule as RFC 001 §4.2.
 ### 4.3 Adapter-specific options
 `SourceRef.options` is a free-form dict of non-secret values (§2.2). Each adapter documents its accepted keys. Unknown keys MUST be ignored (forward compatibility); the adapter MAY log a warning.
 ---
 ## 5. Metadata schema contract
 ### 5.1 Universal fields
 Existing drawer metadata fields are preserved — the spec adds the following:
 | New field | Type | Added by | Purpose |
 |---|---|---|---|
 | `adapter_name` | `str` | core, from `BaseSourceAdapter.name` | Which registered source produced this drawer. |
 | `adapter_version` | `str` | adapter | Adapter's own version (distinct from palace `normalize_version`). Enables re-extract workflows targeted at drawers from a known-buggy adapter version. |
 | `privacy_class` | `str` | adapter default, config override | Per §6. |
 Existing fields retain their current semantics (verified against `miner.py:542-561` and `convo_miner.py:338-350`):
 | Existing field | Role in the spec |
 |---|---|
 | `source_file` | Functions as the adapter's source-item identifier. Adapter defines the shape — a filesystem path for filesystem, a URI like `github.com/org/repo#pr=123` for git. MUST be stable across re-ingests of the same logical item. |
 | `source_mtime` | Functions as the source-item version for filesystem. Adapters without mtime semantics MAY omit this field and use a different version discriminator (e.g., commit SHA in a separate `metadata["commit_sha"]` field); the spec only requires that `is_current()` can decide staleness from the stored metadata. |
 | `filed_at` | When the record was written. ISO-8601 string. |
 | `added_by` | Agent name (e.g., `lumi`, `claude-code`). Orthogonal to `adapter_name` — the agent is *who* triggered mining; the adapter is *how* data was extracted. |
 | `wing`, `room`, `hall` | Palace routing. Populated by adapter per §2.5. |
 | `chunk_index` | Per §1.6. Always 0 for `whole_record` / `metadata_only`. |
 | `normalize_version` | Palace-wide schema version (currently `palace.py:50`). Unchanged. Separate from `adapter_version`. |
 | `entities` | Semicolon-joined candidate entity names. Already flat; kept flat (§5.4 replacement). |
 | `ingest_mode` | Per §1.5. Already on conversation drawers; added to filesystem drawers by the cleanup PR. |
 | `extract_mode` | Conversation-adapter-specific (`exchange` vs `general`). Moves into the conversations adapter's declared schema per §5.2. |
 **Nothing is renamed. Nothing is removed.** The spec formalizes the shape ingesters already converge on. Existing `where={"source_file": ...}` queries in `searcher.py`, `palace.py`, and callers keep working.
 **Chroma metadata constraint:** all metadata values MUST be `str | int | float | bool`. No lists, no nested dicts. This matches RFC 001 §1.4 and the underlying ChromaDB contract. Structured side-data goes to the SQLite knowledge graph (§5.5) or to a declared flat JSON-encoded string field (§5.4).
 ### 5.2 Adapter schemas
 Each adapter returns an `AdapterSchema` from `describe_schema()`:
 ```python
@dataclass(frozen=True)
 class AdapterSchema:
    fields: dict[str, FieldSpec]   # Keyed by metadata key.
    version: str
@dataclass(frozen=True)
 class FieldSpec:
    type: Literal["string", "int", "float", "bool", "delimiter_joined_string", "json_string"]
    required: bool
    description: str
    indexed: bool = False           # Hint to backends that can build indexes (RFC 001 §2.1).
    # delimiter_joined_string: the delimiter character (default ";").
    delimiter: str = ";"
    # json_string: the JSON schema of the encoded object (informational only).
    json_schema: dict | None = None
 ```
 `delimiter_joined_string` covers the `entities` shape (current `;`-joined list of names). `json_string` is the escape hatch for adapters needing to pack nested data — the value stored is still a single flat `str` from Chroma's perspective, but the adapter is allowed to document its parsed shape.
 Example for a hypothetical `slack` adapter:
 ```python
 AdapterSchema(
    version="1.0",
    fields={
        "channel_name": FieldSpec(type="string", required=True, description="Slack channel name", indexed=True),
        "channel_id": FieldSpec(type="string", required=True, description="Slack channel ID"),
        "thread_ts": FieldSpec(type="string", required=False, description="Thread root timestamp"),
        "author_id": FieldSpec(type="string", required=True, description="Slack user ID", indexed=True),
        "author_name": FieldSpec(type="string", required=True, description="Display name at extraction time"),
        "reactions": FieldSpec(type="delimiter_joined_string", required=False, description="Emoji shortcodes"),
    },
 )
 ```
 ### 5.3 Enterprise keying
 The adapter schema is the stable surface enterprises filter on. A support team querying the palace for `channel_id = "C01234"` does not care about ChromaDB's internal representation. The schema field is declared by the adapter, indexed by the backend (RFC 001 §2.1 `supports_metadata_filters`), and exposed through the existing `where=` clause.
 This is how "structured data" serves company use cases without breaking transformation guarantees: declared-transformation content in the drawer, structured fields in the metadata, schema declared by the adapter, filtering done by the backend.
 ### 5.4 Entity hints (optional)
 Adapters with `supports_entity_hints` MAY include:
 ```python
 metadata["entity_hints_json"] = '[{"type":"person","name":"Milla Jovovich","confidence":0.95,"offset":120},{"type":"project","name":"MemPalace","confidence":1.0,"offset":0}]'
 ```
 The value is a JSON-encoded string (type `json_string` in the adapter schema). Core parses on read and feeds into `mempalace/entity_detector.py` as a prior: hints with `confidence >= 0.9` bypass the heuristic detector; lower-confidence hints feed into it as candidates.
 This is additive to the existing flat `entities` field — entity_hints carries structure (type, confidence, offset); `entities` remains the Chroma-indexable flat string. An adapter that produces entity_hints MUST also populate `entities` as the flat name-only projection, so existing filter queries keep working.
 ### 5.5 Knowledge-graph triples (optional)
 Adapters with `supports_kg_triples` write directly to the SQLite knowledge graph via `mempalace/knowledge_graph.py` — **not** to drawer metadata. Chroma cannot store structured triples; the KG already exists for this purpose.
 The adapter calls the existing `KnowledgeGraph.add_triple()` (signature verified against `mempalace/knowledge_graph.py:130`):
 ```python
 palace.kg.add_triple(
    subject="Ben",
    predicate="committed",
    obj="PR-567",                    # `object` is a Python builtin — the API uses `obj`.
    valid_from="2026-03-12",
    confidence=1.0,
    source_file=drawer.source_file,  # Existing provenance parameter.
 )
 ```
 Drawer metadata includes a flat counter — `metadata["kg_triples_count"]: int` — so search consumers can see at a glance that KG side-data exists for a drawer without hitting SQLite.
 The existing API has `source_closet` and `source_file` provenance parameters but no `source_drawer_id` or `adapter_name`. The cleanup PR (§9) should add these two optional parameters to `add_triple()` so adapter-written triples can be traced back to (a) the specific drawer that produced them and (b) the adapter that authored them — necessary for re-extraction workflows. Until that lands, adapters use `source_file` as the provenance key and record adapter authorship via a separate table or a predicate naming convention (e.g., `adapter:git:committed`).
 This aligns with the existing architecture in `CLAUDE.md` ("Knowledge Graph: ENTITY → PREDICATE → ENTITY with valid_from / valid_to dates") — the RFC formalizes the adapter-side write path.
 ### 5.6 Source encoding and newline
 Current ingesters handle encoding lossily (`errors="replace"` in `miner.py:595` and `normalize.py:124`) and do not record original encoding. The spec does **not** require per-drawer `source_encoding` / `source_newline` — most runs are uniform UTF-8 / LF, and storing the same value on every drawer wastes bytes.
 Instead: adapters that handle non-UTF-8 or non-LF sources record the values once on the adapter's `SourceSummary` and per-drawer only when a specific drawer diverges from the adapter default. The `utf8_replace_invalid` declared transformation (§1.4) already communicates that lossy decoding happened; specific drawer-level provenance is opt-in.
 ---
 ## 6. Privacy class
 ### 6.1 Defined levels
 | Level | Meaning | Example sources |
 |---|---|---|
 | `public` | Content intended for public consumption. | arXiv papers, public GitHub repos, published blogs. |
 | `internal` | Organizational content, not for public disclosure. | Corporate Slack, internal Notion, private git repos. |
 | `pii_potential` | May contain personally identifiable information. | Email, iMessage, Claude/ChatGPT transcripts. |
 | `sensitive` | Known to contain PII, financial, or health data. | Medical records, financial statements, legal filings. |
 | `secrets_possible` | May contain credentials or secrets. | Git history, environment dumps, CI logs. |
 An adapter declares a default on `BaseSourceAdapter.default_privacy_class`. Users MAY override per-source in config.
 ### 6.2 Enforcement
 - Each palace declares a `privacy_floor`. Drawers above the floor (equal to or laxer) are admitted; drawers below are rejected at write time and surfaced in a `rejected` list on the CLI and MCP tool.
 - **Default floor: none** — v1 accepts all levels unless the palace explicitly configures a floor. This keeps the single-user local default low-friction (users who run `mempalace mine` on a git repo expect `secrets_possible` drawers to land). Enterprise deployments MUST set a floor; docs for regulated-domain setup will recommend starting strict and relaxing as needed.
 - Search results surface `privacy_class` in result metadata. MCP tool wrappers MAY redact results above a caller-declared ceiling.
 - `secrets_possible` drawers SHOULD pass through a secrets-scan pre-index hook when one is available. PR #389 (sensitive content scanner) is the expected enforcement mechanism for v1; until it lands, `secrets_possible` is a label without automated scanning. The label is still useful — it enables floor-based rejection and alerts downstream consumers.
 - The privacy class is recorded in drawer metadata and cannot be downgraded without a migration log entry, matching RFC 001's embedder-identity pattern.
 Privacy class is how a regulated-domain deployment (medical, legal, financial) can use MemPalace safely. Without it, flexible ingest becomes a liability; with it, ingest is scoped by policy.
 ---
 ## 7. Testing contract
 ### 7.1 The abstract suite
 MemPalace ships `mempalace.sources.testing.AbstractSourceAdapterContractSuite` — a pytest mixin. Every adapter package ships a concrete subclass:
 ```python
 from mempalace.sources.testing import AbstractSourceAdapterContractSuite
 class TestCursorAdapter(AbstractSourceAdapterContractSuite):
    @pytest.fixture
    def adapter(self):
        return CursorAdapter()
    @pytest.fixture
    def fixture_source(self, tmp_path):
        """Build a minimal Cursor workspaceStorage fixture."""
        ...
        return SourceRef(local_path=str(tmp_path))
    @pytest.fixture
    def canonical_source_bytes(self, fixture_source):
        """Return a mapping of source_file -> authoritative bytes.
        For filesystem sources: the file's raw bytes.
        For SQLite sources: the extracted value column bytes for each row.
        For API sources: the canonical HTTP response body bytes.
        Adapter-defined — the adapter knows what its 'source bytes' are.
        """
        ...
 ```
 The suite covers:
 - `ingest` yields items with stable `source_file` and well-formed `version`.
 - `is_current()` returns True when metadata matches, False when it differs.
 - `close()` releases resources; subsequent calls raise `AdapterClosedError`.
 - Unicode content and unicode identifiers are preserved end-to-end.
 - Large-source handling: 10k+ items ingest without loading all into memory.
 - Error paths: `SourceNotFoundError`, `AuthRequiredError` raise with correct types.
 - `SourceRef.options` MUST NOT contain secrets — the adapter raises if it detects a value matching a common-secret pattern (GitHub token prefix, Slack token prefix, etc.). Advisory test, not blocking.
 ### 7.2 Byte-preserving round-trip (for `byte_preserving` adapters only)
 Required for adapters advertising `byte_preserving`:
 ```python
 def test_byte_preserving_round_trip(self, adapter, fixture_source, canonical_source_bytes):
    """Concatenated chunks must equal the canonical source bytes.
    For each source_file in the fixture:
      1. Read canonical_source_bytes[source_file].
      2. Collect all DrawerRecords for that source_file from adapter.ingest(...).
         Skip metadata_only drawers (§1.5).
      3. Sort by chunk_index.
      4. Concatenate record.content values.
      5. Assert equality with the canonical bytes (UTF-8 decoded).
    """
 ```
 Failure raises `TransformationViolationError`.
 ### 7.3 Declared-transformation round-trip (for `declared_lossy` adapters)
 Required for adapters with non-empty `declared_transformations`:
 ```python
 def test_declared_transformation_round_trip(self, adapter, fixture_source, canonical_source_bytes):
    """Adapter output must be reproducible by applying ONLY declared transformations.
    1. For each source_file, read canonical_source_bytes.
    2. Apply each declared transformation in declared_transformations to the bytes,
       in the order declared by the adapter, using the reference implementations
       in mempalace.sources.transforms.
    3. Compare the result to the concatenated record.content values.
    4. If they differ, the adapter has applied a transformation it did not declare.
       Raise TransformationViolationError.
    """
 ```
 For transformations not in the reserved list (§1.4) — adapter-custom names — the adapter MUST provide a reference implementation callable under `mempalace.sources.transforms.<adapter_name>_<transform_name>`. The conformance suite imports and applies it. Undiscoverable custom transforms fail the test.
 ### 7.4 Schema conformance
 A generator-based property test validates that every record yielded by `ingest` across the fixture source has metadata matching `describe_schema()`. Missing required fields, wrong types, or (in strict mode) undeclared fields fail the test.
 ### 7.5 Note on current corpus
 No existing test in `tests/` asserts byte-preservation or declared-transformation correctness (verified via grep of `tests/` for `verbatim|byte.?preserv|round.?trip`). This RFC's conformance suite introduces the first such coverage. The existing MISSION.md claim of "verbatim always" is a social contract until this lands; afterward it becomes a machine-verified property of adapters that declare `byte_preserving`.
 ---
 ## 8. Versioning and compatibility
 - `BaseSourceAdapter.spec_version` declares which spec version an adapter implements.
 - MemPalace refuses to load an adapter declaring a different major spec version.
 - Minor spec versions are additive: new optional methods, new capability tokens, new reserved transformation names, new universal metadata fields with sensible defaults.
 - Adapters MAY declare their own `adapter_version` independent of the spec version; this is recorded on every drawer (§5.1) and enables "this drawer was extracted by cursor-adapter 0.3; 0.4 fixed a parsing bug; re-extract affected drawers" workflows.
 - This is spec v1.0.
 ---
 ## 9. Cleanup prerequisite (not in this spec, but gating)
 The existing in-tree ingesters are not adapter-shaped. Before RFC 002 can be enforced, the following refactor lands in a separate PR:
 - Introduce `mempalace/sources/base.py` defining `BaseSourceAdapter`, the typed records, and the registry.
 - Introduce `mempalace/sources/transforms.py` with reference implementations of every reserved transformation in §1.4. Adapters and the conformance suite both consume these.
 - `mempalace/miner.py` → `mempalace/sources/filesystem.py` implementing `BaseSourceAdapter`. Current behavior preserved: 800-char chunking becomes the adapter's default; `READABLE_EXTENSIONS` moves to the adapter; `detect_room()` and `detect_hall()` move to the adapter per §2.5. `declared_transformations = frozenset({"utf8_replace_invalid", "whitespace_trim"})`.
 - `mempalace/convo_miner.py` → `mempalace/sources/conversations.py`. Exchange-pair chunking stays. The format-detection logic in `normalize.py` becomes per-format plugins the conversations adapter composes (one for Claude Code JSONL, one for Codex JSONL, one for ChatGPT mapping trees, one for Claude.ai JSON, one for Slack JSON) — each small and independently testable, eliminating the `if source_type` chain. `declared_transformations` enumerates every transformation `normalize.py` and `convo_miner._chunk_by_exchange` actually perform (see §1.4 "Existing code mapping").
 - Closet-building wired into the conversations adapter's post-step (currently missing, per §1.7) — side effect of routing through the unified core post-step.
 - `mempalace/cli.py` subcommand `mine` routes through the `mempalace.sources` registry. `--mode {projects,convos}` becomes a deprecated alias for `--source {filesystem,conversations}`.
 - `mempalace/mcp_server.py` `mempalace_mine` tool accepts a `source` parameter.
 - `mempalace/palace.py` exposes `PalaceContext` — a per-mine-invocation facade that bundles the drawer collection, closet collection, knowledge graph, palace config, and progress hooks. Adapters receive this; they do not import `palace.py` directly.
 - `NORMALIZE_VERSION` (currently a module-level constant in `palace.py:50`) stays. It is the palace-wide schema version, orthogonal to per-adapter `adapter_version`.
 - `KnowledgeGraph.add_triple()` (`knowledge_graph.py:130`) gains two optional parameters: `source_drawer_id: str = None` and `adapter_name: str = None`. Existing callers are unaffected; adapters advertising `supports_kg_triples` (§5.5) populate both. Backwards-compatible change.
 This cleanup is substantial — comparable to RFC 001 §10's chroma-import removal — and should land before any new third-party adapter PR merges. Each new adapter is easier after the cleanup, not harder.
 ---
 ## 10. Impact on in-flight PRs
 | PR / Issue | Effort to align |
 |---|---|
 | [#274](https://github.com/MemPalace/mempalace/issues/274) Cursor SQLite | Becomes `mempalace-source-cursor` third-party package. Author has a working prototype on Windows; needs `describe_schema()`, `declared_transformations`, and the conformance suite. Prior #287 (closed unmerged) is predecessor work. |
 | [#23](https://github.com/MemPalace/mempalace/pull/23) OpenCode SQLite | Becomes `mempalace-source-opencode`. Same shape as Cursor. |
 | [#169](https://github.com/MemPalace/mempalace/pull/169) Pi agent | Becomes `mempalace-source-pi` or a format plugin under the conversations adapter (depending on format similarity). |
 | [#232](https://github.com/MemPalace/mempalace/pull/232) Cursor JSONL | Deprecated in favor of #274's SQLite path; or a second mode of `mempalace-source-cursor`. |
 | [#567](https://github.com/MemPalace/mempalace/pull/567), [#98](https://github.com/MemPalace/mempalace/pull/98) git-mine | Closest existing work to what the spec envisions. Becomes first-party `mempalace/sources/git.py`. Exercises `whole_record` mode, `supports_structured_metadata`, `supports_closet_hints` (decision-signal quotes), `supports_kg_triples` (commit authorship, PR review relationships). |
 | [#591](https://github.com/MemPalace/mempalace/pull/591), [#592](https://github.com/MemPalace/mempalace/pull/592) Delphi Oracle | Deferred. The live-stream pattern is out of scope for v1 (§Non-goals). A v1.1 addition will specify webhook/stream adapters. |
 | [#702](https://github.com/MemPalace/mempalace/pull/702) Cursor + factory.ai | Splits into two adapter packages. |
 | [#981](https://github.com/MemPalace/mempalace/issues/981) path-level descriptions | Absorbed by §1.5 `metadata_only` mode + §5.1 `ingest_mode`. A new first-party `descriptions` adapter or a second mode on `filesystem`. |
 | [#244](https://github.com/MemPalace/mempalace/pull/244) Cursor memory-first MCP workflow docs | Points at `mempalace-source-cursor` once the adapter lands. |
 | [#419](https://github.com/MemPalace/mempalace/pull/419), [#300](https://github.com/MemPalace/mempalace/pull/300), [#952](https://github.com/MemPalace/mempalace/pull/952) language-extension additions to `READABLE_EXTENSIONS` | Becomes per-language config on the filesystem adapter. Contributors can publish domain-specific adapters without touching core. |
 | [#389](https://github.com/MemPalace/mempalace/pull/389) sensitive content scanner | Expected enforcement mechanism for the `secrets_possible` privacy class (§6.2). Not a blocker for this spec, but a natural consumer. |
 | [#434](https://github.com/MemPalace/mempalace/pull/434) auto-populate KG from drawers | Complementary: post-hoc derivation of KG triples from drawer content. Adapters with `supports_kg_triples` provide the up-front path; #434 handles everything else. |
 ---
 ## 11. Open questions
 1. **Cross-adapter dedup.** When a PR body is mined via `git` AND shows up as a conversation quote mined via `claude-code`, both drawers land. Is query-time dedup in `searcher.py` sufficient, or should core maintain a content-hash index across adapters? Declared non-goal in v1 but worth revisiting if user feedback demands it.
 2. **Live-stream pattern.** Delphi Oracle (#591/592) and potentially Slack/Discord real-time ingestion need a push-mode contract. This is a v1.1 addition (streaming adapter trait + webhook surface), not blocking.
 3. **LLM-assisted structured extraction.** Some adapters will want to call an LLM to extract structured fields. The spec does not standardize this — should it? Argument for: conformance test for LLM-driven fields, consistent caching. Argument against: local-first / zero-API is a core promise; LLM dependencies are opt-in per adapter.
 4. **Adapter-vs-format split for conversations.** §9 proposes format plugins composed under a single conversations adapter. Alternative: one adapter per format (claude-code, chatgpt, codex, cursor-jsonl, slack). The trade-off is discoverability (one adapter is easier to find) vs. encapsulation (format plugins are simpler to test). Preference leans toward the single-adapter + plugin model; open to counter-argument.
 5. **Default `privacy_floor`.** v1 defaults to none (§6.2) so single-user local mining is frictionless. An argument exists for defaulting to `pii_potential` — forces regulated-domain users to opt in to sensitive levels rather than opt out. Open to changing the default before v1 ships.
 6. **`canonical_source_bytes` for API-backed adapters.** §7.1 defines this as adapter-declared. For API-backed adapters (Slack, Notion), what constitutes "canonical bytes" in a conformance test — the fixture's captured HTTP response? A serialized representation of the parsed object? Leaves to the adapter; may need a follow-up spec for common conventions.
 7. **`adapter_version` bump semantics.** When does an adapter bump `adapter_version`? On any behavior change? On declared-transformation changes only? Suggests a follow-up doc on adapter SemVer conventions for the community to agree on.
 ---
 ## 12. Rollout
 1. Land the cleanup PR (§9): introduce `mempalace/sources/`, refactor `miner.py` → filesystem adapter, `convo_miner.py` → conversations adapter, route CLI and MCP through the sources registry. Behavior preserved end-to-end. Closets get built for conversation drawers as a side effect.
 2. Land this spec as-is. Add `AbstractSourceAdapterContractSuite`, entry-point discovery, `AdapterSchema` validation, privacy-class enforcement (floor-gated writes), declared-transformation reference implementations in `mempalace/sources/transforms.py`.
 3. Land `mempalace/sources/git.py` as the first-party adapter absorbing #567. Exercises `whole_record`, `supports_structured_metadata`, `supports_closet_hints`, `supports_kg_triples` together.
 4. Encourage the Cursor (#274), OpenCode (#23), and Pi (#169) authors to publish as third-party packages under `mempalace-source-*`. Offer review help against the spec.
 5. Publish adapter-authoring docs at [mempalaceofficial.com/guide/authoring-sources](https://mempalaceofficial.com/guide/authoring-sources.html).
 6. Update [ROADMAP.md](../../ROADMAP.md) with spec v1.0 adoption under v4.0.0-alpha.
@@ -1,66 +0,0 @@
 # How to Use MemPalace Hooks (Auto-Save)
 MemPalace hooks act as an "Auto-Save" feature. They help your AI keep a permanent memory without you needing to run manual commands.
 ### 1. What are these hooks?
 * **Save Hook** (`mempal_save_hook.sh`): Saves new facts and decisions every 15 messages.
 * **PreCompact Hook** (`mempal_precompact_hook.sh`): Saves your context right before the AI's memory window fills up.
 ### 2. Setup for Claude Code
 Add this to `~/.claude/settings.local.json` (global) or `.claude/settings.local.json` (project-scoped) to enable automatic background saving:
 ```json
 {
  "hooks": {
    "Stop": [
      {
        "matcher": "*", 
        "hooks": [{
          "type": "command",
          "command": "/absolute/path/to/hooks/mempal_save_hook.sh",
          "timeout": 30
        }]
      }
    ],
    "PreCompact": [
      {
        "hooks": [{
          "type": "command",
          "command": "/absolute/path/to/hooks/mempal_precompact_hook.sh",
          "timeout": 30
        }]
      }
    ]
  }
 }
 ```
 Make the hooks executable:
 ```bash
 chmod +x /absolute/path/to/hooks/mempal_save_hook.sh
 chmod +x /absolute/path/to/hooks/mempal_precompact_hook.sh
 ```
 **Note:** Replace `/absolute/path/to/hooks/` with the actual path where you cloned the MemPalace repository (e.g., `~/projects/mempalace/hooks/`).
 ### 3. What changed (v3.1.0+)
 Both hooks now have **two-layer capture**:
 1. **Auto-mine**: Before blocking the AI, the hook runs the normalizer on the JSONL transcript and upserts chunks directly into the palace. This captures raw tool output (Bash results, search findings, build errors) that the AI would otherwise summarize away.
 2. **Updated reason messages**: The block reason now explicitly tells the AI to save tool output verbatim — not just topics and decisions.
 ### 4. Backfill past conversations (one-time)
 The hooks capture conversations going forward, but you probably have months of past sessions. Run this once to mine them all:
 ```bash
 mempalace mine ~/.claude/projects/ --mode convos
 ```
 ### 5. Configuration
 - **`SAVE_INTERVAL=15`** — How many human messages between saves
 - **`MEMPALACE_PYTHON`** — Python interpreter with mempalace + chromadb. Auto-detects: env var → repo venv → system python3
 - **`MEMPAL_DIR`** — Optional directory for auto-ingest via `mempalace mine`
@@ -1,12 +0,0 @@
 #!/usr/bin/env python3
 """Example: mine a project folder into the palace."""
 import sys
 project_dir = sys.argv[1] if len(sys.argv) > 1 else "~/projects/my_app"
 print("Step 1: Initialize rooms from folder structure")
 print(f"  mempalace init {project_dir}")
 print("\nStep 2: Mine everything")
 print(f"  mempalace mine {project_dir}")
 print("\nStep 3: Search")
 print("  mempalace search 'why did we choose this approach'")
@@ -1,11 +0,0 @@
 #!/usr/bin/env python3
 """Example: import Claude Code / ChatGPT conversations."""
 print("Import Claude Code sessions:")
 print("  mempalace mine ~/claude-sessions/ --mode convos --wing my_project")
 print()
 print("Import ChatGPT exports:")
 print("  mempalace mine ~/chatgpt-exports/ --mode convos")
 print()
 print("Use general extractor for richer extraction:")
 print("  mempalace mine ~/chats/ --mode convos --extract general")
@@ -1,102 +0,0 @@
 # Gemini CLI Integration Guide
 This guide explains how to set up MemPalace as a permanent memory for the [Gemini CLI](https://github.com/google/gemini-cli).
 ## Prerequisites
 - Python 3.9+
 - Gemini CLI installed and configured
 ## 1. Installation
 On many Linux systems, installing Python packages globally is restricted. We
 recommend [`uv`](https://docs.astral.sh/uv/), which creates and manages a
 local virtual environment for you.
 ```bash
 # Clone the repository (if you haven't already)
 git clone https://github.com/MemPalace/mempalace.git
 cd mempalace
 # Create the venv and install MemPalace + dependencies in editable mode
 uv sync
 ```
 This produces a `.venv/` directory inside the repo with everything installed.
 If you prefer plain pip:
 ```bash
 python3 -m venv .venv
 .venv/bin/pip install -e .
 ```
 ## 2. Initialization
 Set up your "Palace" (the database) and configure your identity.
 ```bash
 # Initialize the palace in the current directory
 uv run python -m mempalace init .
 ```
 ### Identity and Wings (Optional but Recommended)
 You can manually define who you are and what projects you work on by creating/editing these files in `~/.mempalace/`:
 - **`~/.mempalace/identity.txt`**: A plain text file describing your role and focus.
 - **`~/.mempalace/wing_config.json`**: A JSON file mapping projects and name variants to "Wings".
 ## 3. Connect to Gemini CLI (MCP)
 Register MemPalace as an MCP server so Gemini CLI can use its tools.
 ```bash
 gemini mcp add mempalace /absolute/path/to/mempalace/.venv/bin/python3 -m mempalace.mcp_server --scope user
 ```
 *Note: Use the absolute path to ensure it works from any directory.*
 ## 4. Enable Auto-Saving (Hooks)
 To ensure the AI saves memories automatically when conversation history becomes too long, add a `PreCompress` hook to your Gemini CLI settings.
 Edit your `~/.gemini/settings.json` and add the following:
 ```json
 {
  "hooks": {
    "PreCompress": [
      {
        "matcher": "*",
        "hooks": [
          {
            "type": "command",
            "command": "/absolute/path/to/mempalace/hooks/mempal_precompact_hook.sh"
          }
        ]
      }
    ]
  }
 }
 ```
 Make sure the hook scripts are executable:
 ```bash
 chmod +x hooks/*.sh
 ```
 ## 5. Usage
 Once connected, Gemini CLI will automatically:
 - Start the MemPalace server on launch.
 - Use `mempalace_search` to find relevant past discussions.
 - Use the `PreCompress` hook to save new memories before they are lost.
 ### Manual Mining
 If you want the AI to learn from your existing code or docs immediately, run the "mine" command:
 ```bash
 uv run python -m mempalace mine /path/to/your/project
 ```
 ### Verification
 In a Gemini CLI session, you can run:
 - `/mcp list`: Verify `mempalace` is `CONNECTED`.
 - `/hooks panel`: Verify the `PreCompress` hook is active.
@@ -1,27 +0,0 @@
 # MCP Integration — Claude Code
 ## Setup
 Run the MCP server:
 ```bash
 mempalace-mcp
 ```
 Or add it to Claude Code:
 ```bash
 claude mcp add mempalace -- mempalace-mcp
 ```
 ## Available Tools
 The server exposes the full MemPalace MCP toolset. Common entry points include:
 - **mempalace_status** — palace stats (wings, rooms, drawer counts)
 - **mempalace_search** — semantic search across all memories
 - **mempalace_list_wings** — list all projects in the palace
 ## Usage in Claude Code
 Once configured, Claude Code can search your memories directly during conversations.
@@ -2,17 +2,51 @@
 These hook scripts make MemPalace save automatically. No manual "save" commands needed.
 This deployment ships only the **remote** hook variants — the palace runs as a Docker container on a server (e.g. Unraid), and hooks `curl` the active session transcript to the server's `/ingest/transcript` endpoint over HTTPS with bearer auth. Server-side, the existing `mine_convos` pipeline handles entity detection, room assignment, dedup, and idempotency. See [`deploy/unraid/README.md`](../deploy/unraid/README.md) for the server side.
 ## What They Do
 | Hook | When It Fires | What Happens |
-|------|--------------|-------------|
+|---|---|---|
-| **Save Hook** | Every 15 human messages | Auto-mines transcript (tool output included), then blocks the AI to save topics/decisions/quotes |
+| **Save Hook** (`mempal_save_hook_remote.sh`) | Every 15 user messages (configurable via `SAVE_INTERVAL`) | Backgrounded `curl` POSTs the active transcript. Returns immediately so the AI doesn't stall. Idempotent — failed retries are safe. |
-| **PreCompact Hook** | Right before context compaction | Auto-mines transcript, then emergency save — forces the AI to save EVERYTHING before losing context |
+| **PreCompact Hook** (`mempal_precompact_hook_remote.sh`) | Right before context compaction | Synchronous `curl` POST. Blocks until the upload completes (or the hook timeout fires) so memory is durable before context shrinks. |
-**Two-layer capture:** Hooks auto-mine the JSONL transcript directly into the palace (capturing raw tool output — Bash results, search findings, build errors). They also block the AI with a reason message telling it to save verbatim tool output and key context. Belt and suspenders — tool output gets stored even if the AI summarizes instead of quoting.
+**Two-layer capture.** The save hook ships the JSONL transcript directly to the server (capturing raw tool output — Bash results, search findings, build errors), where the miner files it verbatim into the palace. Tool output gets stored even if the AI summarizes instead of quoting.
 ## Env-var contract
 The scripts read all configuration from environment variables. There is no script-level config to edit; the same script works against any number of machines.
 | Variable | Required | Purpose |
 |---|---|---|
 | `MEMPAL_REMOTE_URL` | yes | Base URL of the MemPalace server, e.g. `https://unraid.local:8443`. |
 | `MEMPAL_REMOTE_TOKEN` | yes | Bearer token shared with the server's `MEMPAL_TOKEN`. |
 | `MEMPAL_REMOTE_INSECURE` | no | Set to `1` to skip TLS verification. Use only when the server uses Caddy's `tls internal` self-signed cert and the client hasn't trusted the root CA. |
 | `MEMPAL_REMOTE_WING` | no | Force a specific wing for this client's transcripts. Default: server derives wing from the session id. |
 | `SAVE_INTERVAL` | no | Override the default of 15 user messages. |
 | `MEMPAL_PYTHON` | no | Path to a Python 3 interpreter. Only needs `json` + `sys` from stdlib — mempalace does not need to be installed in it. Used to parse the hook's stdin JSON. |
 Set these persistently:
 **PowerShell (Windows):**
 ```powershell
 [Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_URL",   "https://unraid.local:8443", "User")
 [Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_TOKEN", "<the-token>",               "User")
 [Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_INSECURE", "1", "User")  # if self-signed
 ```
 **Bash/Zsh:** add the same exports to `~/.zshrc` / `~/.bashrc`.
 If `MEMPAL_REMOTE_URL` or `MEMPAL_REMOTE_TOKEN` is unset, the scripts no-op and log a one-liner — they never block the AI from stopping. Safe to install on a machine that doesn't have a remote configured yet.
 ## Install — Claude Code
 Make the scripts executable:
 ```bash
 chmod +x hooks/mempal_save_hook_remote.sh hooks/mempal_precompact_hook_remote.sh
 ```
 Add to `.claude/settings.local.json`:
 ```json
@@ -22,26 +56,21 @@ Add to `.claude/settings.local.json`:
      "matcher": "*",
      "hooks": [{
        "type": "command",
-        "command": "/absolute/path/to/hooks/mempal_save_hook.sh",
+        "command": "/absolute/path/to/hooks/mempal_save_hook_remote.sh",
        "timeout": 30
      }]
    }],
    "PreCompact": [{
      "hooks": [{
        "type": "command",
-        "command": "/absolute/path/to/hooks/mempal_precompact_hook.sh",
+        "command": "/absolute/path/to/hooks/mempal_precompact_hook_remote.sh",
-        "timeout": 30
+        "timeout": 60
      }]
    }]
  }
 }
 ```
 Make them executable:
 ```bash
 chmod +x hooks/mempal_save_hook.sh hooks/mempal_precompact_hook.sh
 ```
 ## Install — Codex CLI (OpenAI)
 Add to `.codex/hooks.json`:
@@ -50,132 +79,111 @@ Add to `.codex/hooks.json`:
 {
  "Stop": [{
    "type": "command",
-    "command": "/absolute/path/to/hooks/mempal_save_hook.sh",
+    "command": "/absolute/path/to/hooks/mempal_save_hook_remote.sh",
    "timeout": 30
  }],
  "PreCompact": [{
    "type": "command",
-    "command": "/absolute/path/to/hooks/mempal_precompact_hook.sh",
+    "command": "/absolute/path/to/hooks/mempal_precompact_hook_remote.sh",
-    "timeout": 30
+    "timeout": 60
  }]
 }
 ```
-## Configuration
+## How it works
 Edit `mempal_save_hook.sh` to change:
 - **`SAVE_INTERVAL=15`** — How many human messages between saves. Lower = more frequent saves, higher = less interruption.
 - **`STATE_DIR`** — Where hook state is stored (defaults to `~/.mempalace/hook_state/`)
 - **`MEMPAL_DIR`** — Optional **project directory** (code, notes, docs) to also mine on each save trigger, with `--mode projects`. The hook ALWAYS mines the active conversation transcript automatically with `--mode convos` — `MEMPAL_DIR` is purely additive, never an override. Leave blank if you don't want to ingest project files.
 - **`MEMPALACE_PYTHON`** — Optional env var. Python interpreter with mempalace + chromadb installed. Auto-detects: `MEMPALACE_PYTHON` env var → repo `venv/bin/python3` → system `python3`. Set this if your venv is in a non-standard location.
 ### mempalace CLI
 The relevant commands are:
 ```bash
 mempalace mine <dir>               # Mine all files in a directory
 mempalace mine <dir> --mode convos # Mine conversation transcripts only
 ```
 The hooks resolve the repo root automatically from their own path, so they work regardless of where you install the repo.
 ## How It Works (Technical)
 ### Save Hook (Stop event)
 ```
 User sends message → AI responds → Claude Code fires Stop hook
                                            ↓
-                                    Hook counts human messages in JSONL transcript
+                                    Hook counts user messages in JSONL transcript
                                            ↓
-                              ┌─── < 15 since last save ──→ echo "{}" (let AI stop)
+                              ┌─── < SAVE_INTERVAL since last save ──→ echo "{}" (let AI stop)
                              │
-                              └─── ≥ 15 since last save
+                              └─── ≥ SAVE_INTERVAL since last save
                                            ↓
-                                    Auto-mine transcript → palace (tool output captured)
+                                    Background curl POST → server /ingest/transcript
                                            ↓
-                                    {"decision": "block", "reason": "save tool output verbatim..."}
+                                    Hook returns {} immediately (AI stops normally)
                                            ↓
-                                    AI saves to palace (topics, decisions, quotes)
+                                    Server-side miner runs in background, files drawers
                                            ↓
                                    AI tries to stop again
                                            ↓
                                    stop_hook_active = true
                                            ↓
                                    Hook sees flag → echo "{}" (let it through)
 ```
 The `stop_hook_active` flag prevents infinite loops: block once → AI saves → tries to stop → flag is true → we let it through.
 ### PreCompact Hook
 ```
 Context window getting full → Claude Code fires PreCompact
                                        ↓
-                                Find transcript (from input or session_id lookup)
+                                Synchronous curl POST → server /ingest/transcript
                                        ↓
-                                Auto-mine transcript → palace (tool output captured)
+                                Wait for 200 OK (or hook timeout)
                                        ↓
-                                {"decision": "block", "reason": "save tool output verbatim..."}
+                                echo "{}" → Compaction proceeds
                                        ↓
                                AI saves everything
                                        ↓
                                Compaction proceeds
 ```
-No counting needed — compaction always warrants a save. The auto-mine captures raw tool output before the AI gets a chance to summarize it away.
+Synchronous on PreCompact is intentional — this is the safety net before context shrinks. The Claude Code hook timeout (set in `settings.local.json`) bounds how long we'll wait.
 ## Debugging
 Check the hook log:
 ```bash
-cat ~/.mempalace/hook_state/hook.log
+tail -f ~/.mempalace/hook_state/hook.log
 ```
-Example output:
+Example:
 ```
 [14:30:15] Session abc123: 12 exchanges, 12 since last save
 [14:35:22] Session abc123: 15 exchanges, 15 since last save
-[14:35:22] TRIGGERING SAVE at exchange 15
+[14:35:22] ingest ok
-[14:40:01] Session abc123: 18 exchanges, 3 since last save
+[14:50:18] PRE-COMPACT triggered for session abc123
 [14:50:19] PRE-COMPACT ingest ok
 ```
-## Known Limitations
+A 401 response means the bearer token is wrong. A connection error means the URL/cert is wrong (or the server is down). All curl output goes to the same log.
-**Hooks require session restart after install.** Claude Code loads hooks from `settings.json` at session start only. If you run `mempalace init` or manually edit hook config mid-session, the hooks won't fire until you restart Claude Code. This is a Claude Code limitation.
+## Known limitations
-**`MEMPAL_PYTHON` override for the hook's internal Python calls.** The save hook parses its JSON input and counts transcript messages with `python3`. When the harness is launched from a GUI on macOS — `open -a`, Spotlight, the dock — its `PATH` is the minimal `/usr/bin:/bin:/usr/sbin:/sbin` inherited from `launchd`, not your shell PATH. If `python3` isn't on that PATH, those internal calls fail and the hook can't count exchanges.
+**Hooks require session restart after install.** Claude Code loads hooks from `settings.json` at session start only. If you edit hook config mid-session, restart Claude Code to pick up changes.
-Point the hook at any Python 3 interpreter to fix it:
+**Python interpreter resolution.** The scripts parse hook stdin JSON with `python3`. When Claude Code is launched from a GUI on macOS (Spotlight, dock, `open -a`), its `PATH` is the minimal `/usr/bin:/bin:/usr/sbin:/sbin` inherited from `launchd` rather than your shell PATH. If `python3` isn't there, set `MEMPAL_PYTHON` to a known-good interpreter:
 ```bash
-export MEMPAL_PYTHON="/usr/bin/python3"                   # system Python is fine
+export MEMPAL_PYTHON="/usr/bin/python3"
-export MEMPAL_PYTHON="$HOME/.venvs/mempalace/bin/python"  # or your venv
+# or:
 export MEMPAL_PYTHON="$HOME/.venvs/x/bin/python"
 ```
-Resolution priority: `$MEMPAL_PYTHON` (if set and executable) → `$(command -v python3)` → bare `python3`. The interpreter only needs `json` and `sys` from the standard library — `mempalace` itself does not need to be installed in it.
+Resolution priority: `$MEMPAL_PYTHON` → `$(command -v python3)` → bare `python3`. The interpreter only needs `json` and `sys` — mempalace itself does not need to be installed.
-Note: the `mempalace mine` auto-ingest runs via the `mempalace` CLI, so that command also needs to be on the hook's `PATH`. Installing with `pipx install mempalace` or `uv tool install mempalace` puts it on a stable global location; otherwise extend the hook environment's `PATH` to include your venv's `bin/`.
+**`MineAlreadyRunning` collisions.** If two clients ingest simultaneously, the second one's request returns 500 because the server-side `mine_lock` is held. The save hook is idempotent — the next save catches up. If you see this constantly in the log, raise `SAVE_INTERVAL` on the chattier client.
-## Backfill Past Conversations
+## Backfilling past conversations
-The hooks only capture conversations going forward. To mine **past** Claude Code sessions into your palace, run a one-time backfill:
+The hooks only capture sessions going forward. To mine **past** sessions into the remote palace, loop `curl` over them:
 ```bash
-mempalace mine ~/.claude/projects/ --mode convos
+# Claude Code sessions
 for f in ~/.claude/projects/**/*.jsonl; do
  curl -k -X POST \
    -H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
    -H "X-Session-Id: $(basename "$f" .jsonl)" \
    --data-binary @"$f" \
    "$MEMPAL_REMOTE_URL/ingest/transcript"
 done
 # Codex CLI sessions
 for f in ~/.codex/sessions/**/*.jsonl; do
  curl -k -X POST \
    -H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
    -H "X-Session-Id: $(basename "$f" .jsonl)" \
    --data-binary @"$f" \
    "$MEMPAL_REMOTE_URL/ingest/transcript"
 done
 ```
-This scans all JSONL transcripts from previous sessions and files them into the `conversations` wing. On a typical developer machine with months of history, this can yield 50K–200K drawers.
+The server-side miner is idempotent — re-uploading the same transcript won't double-file. Drop `-k` once Caddy's root CA is trusted on the client.
 For Codex CLI sessions:
 ```bash
 mempalace mine ~/.codex/sessions/ --mode convos
 ```
 This only needs to be done once — after that, the hooks auto-mine each session as you go.
 ## Cost
-**Zero extra tokens.** The hooks notify the AI that saves happened in the background — the AI doesn't need to write anything in the chat. All filing is handled automatically. Previous versions asked the AI to write diary entries and drawer content in the chat window, which cost ~$1/session in retransmitted tokens.
+**Zero extra tokens.** The hooks save in the background — the AI doesn't need to write anything in the chat window. All filing happens server-side after the upload returns.
@@ -1,123 +0,0 @@
 #!/bin/bash
 # MEMPALACE PRE-COMPACT HOOK — Emergency save before compaction
 #
 # Claude Code "PreCompact" hook. Fires RIGHT BEFORE the conversation
 # gets compressed to free up context window space.
 #
 # This is the safety net. When compaction happens, the AI loses detailed
 # context about what was discussed. This hook forces one final save of
 # EVERYTHING before that happens.
 #
 # Unlike the save hook (which triggers every N exchanges), this ALWAYS
 # blocks — because compaction is always worth saving before.
 #
 # === INSTALL ===
 # Add to .claude/settings.local.json:
 #
 #   "hooks": {
 #     "PreCompact": [{
 #       "hooks": [{
 #         "type": "command",
 #         "command": "/absolute/path/to/mempal_precompact_hook.sh",
 #         "timeout": 30
 #       }]
 #     }]
 #   }
 #
 # For Codex CLI, add to .codex/hooks.json:
 #
 #   "PreCompact": [{
 #     "type": "command",
 #     "command": "/absolute/path/to/mempal_precompact_hook.sh",
 #     "timeout": 30
 #   }]
 #
 # === HOW IT WORKS ===
 #
 # Claude Code sends JSON on stdin with:
 #   session_id — unique session identifier
 #
 # We always return decision: "block" with a reason telling the AI
 # to save everything. After the AI saves, compaction proceeds normally.
 #
 # === MEMPALACE CLI ===
 # The hook ALWAYS mines the active conversation transcript synchronously
 # before compaction (via `mempalace mine <transcript-dir> --mode convos`).
 # MEMPAL_DIR is an *additional*, optional target for project files — it
 # does not replace the conversation mine.
 STATE_DIR="$HOME/.mempalace/hook_state"
 mkdir -p "$STATE_DIR"
 # Optional: project directory (code / notes / docs) to also mine before
 # compaction. Mined with `--mode projects`. The conversation transcript
 # is always mined regardless — this is purely additive.
 # Example: MEMPAL_DIR="$HOME/projects/my_app"
 MEMPAL_DIR=""
 # Resolve the Python interpreter. Same contract as mempal_save_hook.sh:
 # MEMPAL_PYTHON (explicit override) → $(command -v python3) → bare python3.
 MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
 if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
 fi
 # Read JSON input from stdin
 INPUT=$(cat)
 # Parse session_id and transcript_path in one call. Sanitize both, then
 # read sanitized values from one-per-line stdout into shell variables —
 # avoids ``eval`` on generated code (#1231 review). Same contract as
 # mempal_save_hook.sh.
 mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
 import sys, json, re
 data = json.load(sys.stdin)
 sid = data.get('session_id', 'unknown')
 tp = data.get('transcript_path', '')
 safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
 print(safe(sid))
 print(safe(tp))
 " 2>/dev/null)
 SESSION_ID="${_mempal_parsed[0]:-unknown}"
 TRANSCRIPT_PATH="${_mempal_parsed[1]:-}"
 # Expand ~ in path
 TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
 # Validate that TRANSCRIPT_PATH looks like a transcript file. Mirrors
 # mempalace.hooks_cli._validate_transcript_path so the shell hook
 # rejects the same shapes the Python hook rejects (#1231 review).
 is_valid_transcript_path() {
    local path="$1"
    [ -n "$path" ] || return 1
    case "$path" in
        *.json|*.jsonl) ;;
        *) return 1 ;;
    esac
    case "/$path/" in
        */../*) return 1 ;;
    esac
    return 0
 }
 echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log"
 # Run ingest synchronously so memories land before compaction. Two
 # independent targets — both run if both are set:
 #   1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
 #   2. MEMPAL_DIR → --mode projects
 if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
    mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
        >> "$STATE_DIR/hook.log" 2>&1
 elif [ -n "$TRANSCRIPT_PATH" ]; then
    echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
        >> "$STATE_DIR/hook.log"
 fi
 if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
    mempalace mine "$MEMPAL_DIR" --mode projects \
        >> "$STATE_DIR/hook.log" 2>&1
 fi
 # Silent: return empty JSON to not block. "decision": "allow" is invalid —
 # only "block" or {} are recognized.
 echo '{}'
@@ -0,0 +1,102 @@
 #!/bin/bash
 # MEMPALACE PRE-COMPACT HOOK (REMOTE) — emergency save before compaction.
 #
 # Drop-in replacement for mempal_precompact_hook.sh when MemPalace runs
 # on a server. Always synchronous: we wait for the upload to complete
 # before returning so the transcript is on the server before the
 # conversation gets compressed.
 #
 # Required env vars (same as the save hook):
 #   MEMPAL_REMOTE_URL    e.g. https://unraid.local:8443
 #   MEMPAL_REMOTE_TOKEN  bearer token
 # Optional:
 #   MEMPAL_REMOTE_WING        explicit wing override
 #   MEMPAL_REMOTE_INSECURE    "1" for self-signed cert
 #
 # === INSTALL ===
 # Add to .claude/settings.local.json:
 #
 #   "hooks": {
 #     "PreCompact": [{
 #       "hooks": [{
 #         "type": "command",
 #         "command": "/abs/path/to/mempal_precompact_hook_remote.sh",
 #         "timeout": 60
 #       }]
 #     }]
 #   }
 set -u
 STATE_DIR="$HOME/.mempalace/hook_state"
 mkdir -p "$STATE_DIR"
 MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
 if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
 fi
 if [ -z "${MEMPAL_REMOTE_URL:-}" ] || [ -z "${MEMPAL_REMOTE_TOKEN:-}" ]; then
    echo "[$(date '+%H:%M:%S')] PRE-COMPACT: MEMPAL_REMOTE_URL/TOKEN not set — skipping" \
        >> "$STATE_DIR/hook.log"
    echo "{}"
    exit 0
 fi
 INPUT=$(cat)
 mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
 import sys, json, re
 data = json.load(sys.stdin)
 sid = data.get('session_id', 'unknown')
 tp = data.get('transcript_path', '')
 safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
 print(safe(sid))
 print(safe(tp))
 " 2>/dev/null)
 SESSION_ID="${_mempal_parsed[0]:-unknown}"
 TRANSCRIPT_PATH="${_mempal_parsed[1]:-}"
 TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
 is_valid_transcript_path() {
    local path="$1"
    [ -n "$path" ] || return 1
    case "$path" in
        *.json|*.jsonl) ;;
        *) return 1 ;;
    esac
    case "/$path/" in
        */../*) return 1 ;;
    esac
    return 0
 }
 echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" \
    >> "$STATE_DIR/hook.log"
 # Synchronous upload — pre-compact is the safety net, blocking is correct
 # here. The Claude Code hook timeout (set in settings.local.json) bounds
 # how long we'll wait.
 if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
    CURL_OPTS=("-sS" "--max-time" "55" "-X" "POST")
    [ "${MEMPAL_REMOTE_INSECURE:-0}" = "1" ] && CURL_OPTS+=("-k")
    WING_HEADER=()
    [ -n "${MEMPAL_REMOTE_WING:-}" ] && WING_HEADER=(-H "X-Wing: $MEMPAL_REMOTE_WING")
    curl "${CURL_OPTS[@]}" \
        -H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
        -H "X-Session-Id: $SESSION_ID" \
        -H "Content-Type: application/octet-stream" \
        "${WING_HEADER[@]}" \
        --data-binary "@$TRANSCRIPT_PATH" \
        "$MEMPAL_REMOTE_URL/ingest/transcript" \
        >> "$STATE_DIR/hook.log" 2>&1 \
        && echo "[$(date '+%H:%M:%S')] PRE-COMPACT ingest ok" >> "$STATE_DIR/hook.log" \
        || echo "[$(date '+%H:%M:%S')] PRE-COMPACT ingest FAILED — context will compact unsaved" \
            >> "$STATE_DIR/hook.log"
 elif [ -n "$TRANSCRIPT_PATH" ]; then
    echo "[$(date '+%H:%M:%S')] PRE-COMPACT: invalid transcript path: $TRANSCRIPT_PATH" \
        >> "$STATE_DIR/hook.log"
 fi
 echo "{}"
@@ -1,223 +0,0 @@
 #!/bin/bash
 # MEMPALACE SAVE HOOK — Auto-save every N exchanges
 #
 # Claude Code "Stop" hook. After every assistant response:
 # 1. Counts human messages in the session transcript
 # 2. Every SAVE_INTERVAL messages, BLOCKS the AI from stopping
 # 3. Returns a reason telling the AI to save structured diary + palace entries
 # 4. AI does the save (topics, decisions, code, quotes → organized into palace)
 # 5. Next Stop fires with stop_hook_active=true → lets AI stop normally
 #
 # The AI does the classification — it knows what wing/hall/closet to use
 # because it has context about the conversation. No regex needed.
 #
 # === INSTALL ===
 # Add to .claude/settings.local.json:
 #
 #   "hooks": {
 #     "Stop": [{
 #       "matcher": "*",
 #       "hooks": [{
 #         "type": "command",
 #         "command": "/absolute/path/to/mempal_save_hook.sh",
 #         "timeout": 30
 #       }]
 #     }]
 #   }
 #
 # For Codex CLI, add to .codex/hooks.json:
 #
 #   "Stop": [{
 #     "type": "command",
 #     "command": "/absolute/path/to/mempal_save_hook.sh",
 #     "timeout": 30
 #   }]
 #
 # === HOW IT WORKS ===
 #
 # Claude Code sends JSON on stdin with these fields:
 #   session_id       — unique session identifier
 #   stop_hook_active — true if AI is already in a save cycle (prevents infinite loop)
 #   transcript_path  — path to the JSONL transcript file
 #
 # When we block, Claude Code shows our "reason" to the AI as a system message.
 # The AI then saves to memory, and when it tries to stop again,
 # stop_hook_active=true so we let it through. No infinite loop.
 #
 # === MEMPALACE CLI ===
 # The hook ALWAYS mines the active conversation transcript automatically
 # (via `mempalace mine <transcript-dir> --mode convos`). MEMPAL_DIR is an
 # *additional*, optional target for project files — it does not replace
 # the conversation mine.
 #
 # === CONFIGURATION ===
 SAVE_INTERVAL=15  # Save every N human messages (adjust to taste)
 STATE_DIR="$HOME/.mempalace/hook_state"
 mkdir -p "$STATE_DIR"
 # Optional: project directory (code / notes / docs) to also mine each
 # save trigger. Mined with `--mode projects`. The conversation transcript
 # is always mined regardless — this is purely additive.
 # Example: MEMPAL_DIR="$HOME/projects/my_app"
 MEMPAL_DIR=""
 # Resolve the Python interpreter the hook should use.
 #
 # Why this is nontrivial: GUI-launched Claude Code on macOS (or any harness
 # that doesn't inherit the user's shell PATH) may find a `python3` on PATH
 # that lacks mempalace — e.g. /usr/bin/python3 while the user installed
 # mempalace into a venv or pyenv. Users in that situation can point the
 # hook at the right interpreter by exporting MEMPAL_PYTHON.
 #
 # Resolution order (first hit wins):
 #   1. $MEMPAL_PYTHON          — explicit user override (absolute path)
 #   2. $(command -v python3)   — first python3 on the hook's PATH
 #   3. bare "python3"          — last-resort fallback (hope the PATH has it)
 MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
 if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
 fi
 # Read JSON input from stdin
 INPUT=$(cat)
 # Parse all fields in a single Python call (3x faster than separate invocations)
 # without invoking ``eval`` on generated code: Python prints one sanitized
 # value per line, the shell reads them via ``mapfile`` and does plain
 # variable assignment — same data, smaller blast radius if the sanitizer
 # is ever bypassed (#1231 review).
 mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
 import sys, json, re
 data = json.load(sys.stdin)
 sid = data.get('session_id', 'unknown')
 sha_raw = data.get('stop_hook_active', False)
 tp = data.get('transcript_path', '')
 # Shell-safe output — only allow alphanumeric, underscore, hyphen, slash, dot, tilde
 safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
 # Coerce stop_hook_active to strict boolean string
 sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
 print(safe(sid))
 print(sha)
 print(safe(tp))
 " 2>/dev/null)
 SESSION_ID="${_mempal_parsed[0]:-unknown}"
 STOP_HOOK_ACTIVE="${_mempal_parsed[1]:-False}"
 TRANSCRIPT_PATH="${_mempal_parsed[2]:-}"
 # Expand ~ in path
 TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
 # Validate that TRANSCRIPT_PATH looks like a transcript file:
 #   - non-empty
 #   - .jsonl or .json suffix
 #   - no traversal segments (.. components)
 # Mirrors mempalace.hooks_cli._validate_transcript_path so the shell hook
 # rejects the same shapes the Python hook rejects (#1231 review).
 is_valid_transcript_path() {
    local path="$1"
    [ -n "$path" ] || return 1
    case "$path" in
        *.json|*.jsonl) ;;
        *) return 1 ;;
    esac
    case "/$path/" in
        */../*) return 1 ;;
    esac
    return 0
 }
 # If we're already in a save cycle, let the AI stop normally
 # This is the infinite-loop prevention: block once → AI saves → tries to stop again → we let it through
 if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
    echo "{}"
    exit 0
 fi
 # Count human messages in the JSONL transcript
 # SECURITY: Pass transcript path as sys.argv to avoid shell injection via crafted paths
 if [ -f "$TRANSCRIPT_PATH" ]; then
    EXCHANGE_COUNT=$("$MEMPAL_PYTHON_BIN" - "$TRANSCRIPT_PATH" <<'PYEOF'
 import json, sys
 count = 0
 with open(sys.argv[1]) as f:
    for line in f:
        try:
            entry = json.loads(line)
            msg = entry.get('message', {})
            if isinstance(msg, dict) and msg.get('role') == 'user':
                content = msg.get('content', '')
                if isinstance(content, str) and '<command-message>' in content:
                    continue
                count += 1
        except:
            pass
 print(count)
 PYEOF
 2>/dev/null)
 else
    EXCHANGE_COUNT=0
 fi
 # Track last save point for this session
 LAST_SAVE_FILE="$STATE_DIR/${SESSION_ID}_last_save"
 LAST_SAVE=0
 if [ -f "$LAST_SAVE_FILE" ]; then
    LAST_SAVE_RAW=$(cat "$LAST_SAVE_FILE")
    # SECURITY: Validate as plain integer before arithmetic to prevent command injection
    if [[ "$LAST_SAVE_RAW" =~ ^[0-9]+$ ]]; then
        LAST_SAVE="$LAST_SAVE_RAW"
    fi
 fi
 SINCE_LAST=$((EXCHANGE_COUNT - LAST_SAVE))
 # Log for debugging (check ~/.mempalace/hook_state/hook.log)
 echo "[$(date '+%H:%M:%S')] Session $SESSION_ID: $EXCHANGE_COUNT exchanges, $SINCE_LAST since last save" >> "$STATE_DIR/hook.log"
 # Time to save?
 if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
    # Update last save point
    echo "$EXCHANGE_COUNT" > "$LAST_SAVE_FILE"
    echo "[$(date '+%H:%M:%S')] TRIGGERING SAVE at exchange $EXCHANGE_COUNT" >> "$STATE_DIR/hook.log"
    # Auto-mine. Two independent targets — both run if both are set:
    #   1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
    #      (Claude Code session JSONL — must use the convo miner)
    #   2. MEMPAL_DIR (user-configured project) → --mode projects
    #      (code, notes, docs)
    # MEMPAL_DIR is *additive*, not an override: a user with MEMPAL_DIR
    # pointed at their project still gets the active conversation mined.
    if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
        mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
            >> "$STATE_DIR/hook.log" 2>&1 &
    elif [ -n "$TRANSCRIPT_PATH" ]; then
        echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
            >> "$STATE_DIR/hook.log"
    fi
    if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
        mempalace mine "$MEMPAL_DIR" --mode projects \
            >> "$STATE_DIR/hook.log" 2>&1 &
    fi
    # MEMPAL_VERBOSE toggle:
    #   true  = developer mode — block and show diaries/code in chat
    #   false = silent mode (default) — save in background, no chat clutter
    # Set via: export MEMPAL_VERBOSE=true
    if [ "$MEMPAL_VERBOSE" = "true" ] || [ "$MEMPAL_VERBOSE" = "1" ]; then
        cat << 'HOOKJSON'
 {
  "decision": "block",
  "reason": "MemPalace save checkpoint. Write a brief session diary entry covering key topics, decisions, and code changes since the last save. Use verbatim quotes where possible. Continue after saving."
 }
 HOOKJSON
    else
        # Silent mode: return empty JSON to not block. "decision": "allow" is
        # not a valid value — only "block" or {} are recognized.
        echo '{}'
    fi
 else
    # Not time yet — let the AI stop normally
    echo "{}"
 fi
@@ -0,0 +1,170 @@
 #!/bin/bash
 # MEMPALACE SAVE HOOK (REMOTE) — Auto-save every N exchanges to a remote palace.
 #
 # Drop-in replacement for mempal_save_hook.sh when MemPalace runs on a
 # server (e.g. Unraid) instead of the dev machine. Same trigger logic
 # (count human messages, fire every SAVE_INTERVAL), but instead of running
 # `mempalace mine` locally it POSTs the active transcript to the server's
 # /ingest/transcript endpoint.
 #
 # Required env vars:
 #   MEMPAL_REMOTE_URL    Base URL of the MemPalace server, e.g.
 #                        https://unraid.local:8443
 #   MEMPAL_REMOTE_TOKEN  Bearer token (same one configured in the server's
 #                        .env / MEMPAL_TOKEN).
 #
 # Optional env vars:
 #   MEMPAL_REMOTE_WING        Wing name to file under (defaults to the
 #                             session-id-derived inbox name on the server).
 #   MEMPAL_REMOTE_INSECURE    "1" to skip TLS verification — needed when
 #                             the server uses Caddy's self-signed `tls
 #                             internal` cert and the client hasn't trusted
 #                             the Caddy root CA.
 #   SAVE_INTERVAL             Override the default of 15 messages.
 #
 # === INSTALL ===
 # Add to .claude/settings.local.json (Claude Code):
 #
 #   "hooks": {
 #     "Stop": [{
 #       "matcher": "*",
 #       "hooks": [{
 #         "type": "command",
 #         "command": "/abs/path/to/mempal_save_hook_remote.sh",
 #         "timeout": 30
 #       }]
 #     }]
 #   }
 #
 # For Codex CLI, add the same shape to .codex/hooks.json.
 set -u
 SAVE_INTERVAL="${SAVE_INTERVAL:-15}"
 STATE_DIR="$HOME/.mempalace/hook_state"
 mkdir -p "$STATE_DIR"
 # Resolve Python — used only for parsing the hook's stdin JSON.
 MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
 if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
 fi
 # Pre-flight: bail with a clean no-op if config is missing. Returning {}
 # lets Claude Code stop normally; we log the reason for the user to find.
 if [ -z "${MEMPAL_REMOTE_URL:-}" ] || [ -z "${MEMPAL_REMOTE_TOKEN:-}" ]; then
    echo "[$(date '+%H:%M:%S')] MEMPAL_REMOTE_URL/TOKEN not set — skipping" \
        >> "$STATE_DIR/hook.log"
    echo "{}"
    exit 0
 fi
 INPUT=$(cat)
 # Parse session_id, stop_hook_active, transcript_path in one Python call —
 # same sanitization shape as the local hook.
 mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
 import sys, json, re
 data = json.load(sys.stdin)
 sid = data.get('session_id', 'unknown')
 sha_raw = data.get('stop_hook_active', False)
 tp = data.get('transcript_path', '')
 safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
 sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
 print(safe(sid))
 print(sha)
 print(safe(tp))
 " 2>/dev/null)
 SESSION_ID="${_mempal_parsed[0]:-unknown}"
 STOP_HOOK_ACTIVE="${_mempal_parsed[1]:-False}"
 TRANSCRIPT_PATH="${_mempal_parsed[2]:-}"
 TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
 is_valid_transcript_path() {
    local path="$1"
    [ -n "$path" ] || return 1
    case "$path" in
        *.json|*.jsonl) ;;
        *) return 1 ;;
    esac
    case "/$path/" in
        */../*) return 1 ;;
    esac
    return 0
 }
 if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
    echo "{}"
    exit 0
 fi
 # Count human messages (same logic as local hook).
 if [ -f "$TRANSCRIPT_PATH" ]; then
    EXCHANGE_COUNT=$("$MEMPAL_PYTHON_BIN" - "$TRANSCRIPT_PATH" <<'PYEOF'
 import json, sys
 count = 0
 with open(sys.argv[1]) as f:
    for line in f:
        try:
            entry = json.loads(line)
            msg = entry.get('message', {})
            if isinstance(msg, dict) and msg.get('role') == 'user':
                content = msg.get('content', '')
                if isinstance(content, str) and '<command-message>' in content:
                    continue
                count += 1
        except Exception:
            pass
 print(count)
 PYEOF
 2>/dev/null)
 else
    EXCHANGE_COUNT=0
 fi
 LAST_SAVE_FILE="$STATE_DIR/${SESSION_ID}_last_save"
 LAST_SAVE=0
 if [ -f "$LAST_SAVE_FILE" ]; then
    LAST_SAVE_RAW=$(cat "$LAST_SAVE_FILE")
    if [[ "$LAST_SAVE_RAW" =~ ^[0-9]+$ ]]; then
        LAST_SAVE="$LAST_SAVE_RAW"
    fi
 fi
 SINCE_LAST=$((EXCHANGE_COUNT - LAST_SAVE))
 echo "[$(date '+%H:%M:%S')] Session $SESSION_ID: $EXCHANGE_COUNT exchanges, $SINCE_LAST since last save" \
    >> "$STATE_DIR/hook.log"
 if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
    if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
        echo "$EXCHANGE_COUNT" > "$LAST_SAVE_FILE"
        CURL_OPTS=("-sS" "--max-time" "30" "-X" "POST")
        [ "${MEMPAL_REMOTE_INSECURE:-0}" = "1" ] && CURL_OPTS+=("-k")
        WING_HEADER=()
        [ -n "${MEMPAL_REMOTE_WING:-}" ] && WING_HEADER=(-H "X-Wing: $MEMPAL_REMOTE_WING")
        # Background the upload so we don't block the AI's stop. The hook
        # exits immediately with {} — the next save retry will catch any
        # transient failure (the miner is idempotent server-side).
        (
            curl "${CURL_OPTS[@]}" \
                -H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
                -H "X-Session-Id: $SESSION_ID" \
                -H "Content-Type: application/octet-stream" \
                "${WING_HEADER[@]}" \
                --data-binary "@$TRANSCRIPT_PATH" \
                "$MEMPAL_REMOTE_URL/ingest/transcript" \
                >> "$STATE_DIR/hook.log" 2>&1 \
                && echo "[$(date '+%H:%M:%S')] ingest ok" >> "$STATE_DIR/hook.log" \
                || echo "[$(date '+%H:%M:%S')] ingest failed (will retry next save)" \
                    >> "$STATE_DIR/hook.log"
        ) &
        disown
    elif [ -n "$TRANSCRIPT_PATH" ]; then
        echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
            >> "$STATE_DIR/hook.log"
    fi
 fi
 echo "{}"
@@ -1,154 +0,0 @@
 ---
 name: mempalace
 description: "MemPalace — Local AI memory with 96.6% recall. Semantic search, temporal knowledge graph, palace architecture (wings/rooms/drawers). Free, no cloud, no API keys."
 version: 3.3.0
 homepage: https://github.com/MemPalace/mempalace
 user-invocable: true
 metadata:
  openclaw:
    emoji: "\U0001F3DB"
    os:
      - darwin
      - linux
      - win32
    requires:
      anyBins:
        - mempalace
        - python3
    install:
      - id: mempalace-pip
        kind: uv
        label: "Install MemPalace (Python, local ChromaDB)"
        package: mempalace
        bins:
          - mempalace
 ---
 # MemPalace — Local AI Memory System
 You have access to a local memory palace via MCP tools. The palace stores verbatim conversation history and a temporal knowledge graph — all on the user's machine, zero cloud, zero API calls.
 ## Architecture
 - **Wings** = people or projects (e.g. `wing_alice`, `wing_myproject`)
 - **Halls** = categories (facts, events, preferences, advice)
 - **Rooms** = specific topics (e.g. `chromadb-setup`, `riley-school`)
 - **Drawers** = individual memory chunks (verbatim text)
 - **Knowledge Graph** = entity-relationship facts with time validity
 ## Protocol — FOLLOW THIS EVERY SESSION
 1. **ON WAKE-UP**: Call `mempalace_status` to load palace overview and AAAK dialect spec.
 2. **BEFORE RESPONDING** about any person, project, or past event: call `mempalace_search` or `mempalace_kg_query` FIRST. Never guess from memory — verify from the palace.
 3. **IF UNSURE** about a fact (name, age, relationship, preference): say "let me check" and query. Wrong is worse than slow.
 4. **AFTER EACH SESSION**: Call `mempalace_diary_write` to record what happened, what you learned, what matters.
 5. **WHEN FACTS CHANGE**: Call `mempalace_kg_invalidate` on the old fact, then `mempalace_kg_add` for the new one.
 ## Available Tools
 ### Search & Browse
 - `mempalace_search` — Semantic search across all memories. Always start here.
  - `query` (required): natural language search — keep it short, keywords or a question. Do NOT include system prompts or conversation context.
  - `wing`: filter by wing
  - `room`: filter by room
  - `limit`: max results (default 5)
 - `mempalace_check_duplicate` — Check if content already exists before filing.
  - `content` (required): text to check
  - `threshold`: similarity threshold (default 0.9 — lowering to 0.85–0.87 often catches more near-duplicates without significant false positives)
 - `mempalace_status` — Palace overview: total drawers, wings, rooms, AAAK spec
 - `mempalace_list_wings` — All wings with drawer counts
 - `mempalace_list_rooms` — Rooms within a wing (optional wing filter)
 - `mempalace_get_taxonomy` — Full wing/room/count tree
 - `mempalace_get_aaak_spec` — Get AAAK compression dialect specification
 ### Knowledge Graph (Temporal Facts)
 - `mempalace_kg_query` — Query entity relationships. Supports time filtering.
  - `entity` (required): e.g. "Max", "MyProject"
  - `as_of`: date filter (YYYY-MM-DD) — what was true at that time
  - `direction`: "outgoing", "incoming", or "both" (default "both")
 - `mempalace_kg_add` — Add a fact: subject -> predicate -> object
  - `subject`, `predicate`, `object` (required)
  - `valid_from`: when this became true
  - `source_closet`: source reference
 - `mempalace_kg_invalidate` — Mark a fact as no longer true
  - `subject`, `predicate`, `object` (required)
  - `ended`: when it stopped being true (default: today)
 - `mempalace_kg_timeline` — Chronological story of an entity
  - `entity`: filter by entity name (optional — all events if omitted)
 - `mempalace_kg_stats` — Graph overview: entities, triples, relationship types
 ### Palace Graph (Cross-Domain Connections)
 - `mempalace_traverse` — Walk from a room, find connected ideas across wings
  - `start_room` (required): room to start from
  - `max_hops`: connection depth (default 2)
 - `mempalace_find_tunnels` — Find rooms that bridge two wings
  - `wing_a`, `wing_b` (required)
 - `mempalace_graph_stats` — Graph connectivity overview
 ### Write
 - `mempalace_add_drawer` — Store verbatim content into a wing/room
  - `wing`, `room`, `content` (required)
  - `source_file`: optional source reference
  - Checks for duplicates automatically
 - `mempalace_delete_drawer` — Remove a drawer by ID
  - `drawer_id` (required)
 - `mempalace_diary_write` — Write a session diary entry
  - `agent_name` (required): your name/identifier
  - `entry` (required): what happened, what you learned, what matters
  - `topic`: category tag (default "general")
 - `mempalace_diary_read` — Read recent diary entries
  - `agent_name` (required)
  - `last_n`: number of entries (default 10)
 ## Setup
 Install MemPalace and populate the palace (uv recommended):
 ```bash
 uv tool install mempalace   # or: pip install mempalace
 mempalace init ~/my-convos
 mempalace mine ~/my-convos
 ```
 ### OpenClaw MCP config
 Add to your OpenClaw MCP configuration:
 ```json
 {
  "mcpServers": {
    "mempalace": {
      "command": "python3",
      "args": ["-m", "mempalace.mcp_server"]
    }
  }
 }
 ```
 Or via CLI:
 ```bash
 openclaw mcp set mempalace '{"command":"python3","args":["-m","mempalace.mcp_server"]}'
 ```
 ### Other MCP hosts
 ```bash
 # Claude Code
 claude mcp add mempalace -- python -m mempalace.mcp_server
 # Cursor — add to .cursor/mcp.json
 # Codex — add to .codex/mcp.json
 ```
 ## Tips
 - Search is semantic (meaning-based), not keyword. "What did we discuss about database performance?" works better than "database".
 - The knowledge graph stores typed relationships with time windows. Use it for facts about people and projects — it knows WHEN things were true.
 - Diary entries accumulate across sessions. Write one at the end of each conversation to build continuity.
 - Use `mempalace_check_duplicate` before storing new content to avoid duplicates.
 - The AAAK dialect (from `mempalace_status`) is a compressed notation for efficient storage. Read it naturally — expand codes mentally, treat *markers* as emotional context.
 ## License
 [MemPalace](https://github.com/MemPalace/mempalace) is MIT licensed. Created by Milla Jovovich, Ben Sigman, Igor Lins e Silva, and contributors.
@@ -0,0 +1,216 @@
 """
 HTTP ingest server — remote-aware hook target.
 Runs as a daemon thread inside the same process as ``mempalace-mcp`` so
 ChromaDB has exactly one writer per palace (HNSW indexes are not safe
 for multi-process writes). Started from ``mcp_server.main()`` when the
 ``MEMPALACE_INGEST_PORT`` env var is set.
 Endpoints
 ---------
 POST /ingest/transcript
    Body: raw bytes of a Claude Code / Codex JSONL transcript.
    Headers:
        X-Session-Id      required, becomes inbox subdirectory name
        X-Wing            optional, palace wing (defaults to session_id)
        Content-Length    required, max 50 MB
        Authorization     optional ``Bearer <token>``; checked when
                          ``MEMPALACE_INGEST_TOKEN`` is set
    Effect: writes body to
            ``<palace>/inbox/<session>/<session>.jsonl`` and runs
            ``mine_convos`` against that directory.
    Response: 202 with ``{"status": "ingested", "session_id": ..., ...}``.
 GET /healthz
    Response: 200 with a small JSON status payload. Cheap — does not open
    the palace. Used by Docker HEALTHCHECK and by clients to verify
    auth/connectivity before posting a multi-MB transcript.
 Auth is always defense-in-depth here — the primary gate is the
 reverse proxy (Caddy) in front. Setting ``MEMPALACE_INGEST_TOKEN`` adds
 a second check inside the container in case the proxy is bypassed.
 """
 from __future__ import annotations
 import hmac
 import json
 import logging
 import os
 import re
 import sys
 import threading
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from pathlib import Path
 from .config import MempalaceConfig, sanitize_name
 from .version import __version__
 logger = logging.getLogger("mempalace_ingest")
 MAX_TRANSCRIPT_BYTES = 50 * 1024 * 1024  # 50 MB hard cap per upload
 _SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_.\-]{1,128}$")
 def _check_auth(header_value: str | None) -> bool:
    """Validate the bearer token if MEMPALACE_INGEST_TOKEN is set.
    Returns True when no token is configured (auth deferred to the proxy)
    or when the supplied token matches. Uses ``hmac.compare_digest`` to
    avoid timing-based token recovery.
    """
    expected = os.environ.get("MEMPALACE_INGEST_TOKEN", "").strip()
    if not expected:
        return True
    if not header_value or not header_value.lower().startswith("bearer "):
        return False
    supplied = header_value[7:].strip()
    return hmac.compare_digest(supplied.encode("utf-8"), expected.encode("utf-8"))
 def _validate_session_id(value: str | None) -> str:
    if not value or not _SESSION_ID_RE.match(value):
        raise ValueError("invalid or missing X-Session-Id header")
    return value
 class _IngestHandler(BaseHTTPRequestHandler):
    server_version = f"mempalace-ingest/{__version__}"
    # Silence the default per-request stderr line.
    def log_message(self, fmt, *args):  # noqa: A003 (overrides stdlib)
        logger.info("%s - %s", self.address_string(), fmt % args)
    def _send_json(self, status: int, payload: dict) -> None:
        body = json.dumps(payload).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)
    def do_GET(self):  # noqa: N802 (stdlib API)
        if self.path == "/healthz":
            self._send_json(200, {"status": "ok", "version": __version__})
            return
        self._send_json(404, {"error": "not found"})
    def do_POST(self):  # noqa: N802 (stdlib API)
        if self.path != "/ingest/transcript":
            self._send_json(404, {"error": "not found"})
            return
        if not _check_auth(self.headers.get("Authorization")):
            self._send_json(401, {"error": "unauthorized"})
            return
        try:
            session_id = _validate_session_id(self.headers.get("X-Session-Id"))
        except ValueError as exc:
            self._send_json(400, {"error": str(exc)})
            return
        wing_header = self.headers.get("X-Wing", "").strip()
        try:
            wing = sanitize_name(wing_header, "wing") if wing_header else None
        except ValueError as exc:
            self._send_json(400, {"error": f"invalid wing: {exc}"})
            return
        try:
            length = int(self.headers.get("Content-Length", "0"))
        except ValueError:
            self._send_json(400, {"error": "invalid Content-Length"})
            return
        if length <= 0:
            self._send_json(400, {"error": "empty body"})
            return
        if length > MAX_TRANSCRIPT_BYTES:
            self._send_json(413, {"error": f"body exceeds {MAX_TRANSCRIPT_BYTES} bytes"})
            return
        body = self.rfile.read(length)
        if len(body) != length:
            self._send_json(400, {"error": "truncated body"})
            return
        # Drop the transcript into the palace inbox and let the existing
        # convo miner handle parsing, dedup, room assignment. The miner is
        # idempotent — if the hook retries we won't double-file.
        palace_path = Path(MempalaceConfig().palace_path)
        inbox_dir = palace_path / "inbox" / session_id
        try:
            inbox_dir.mkdir(parents=True, exist_ok=True)
        except OSError as exc:
            logger.exception("inbox mkdir failed")
            self._send_json(500, {"error": f"inbox unavailable: {exc}"})
            return
        transcript_path = inbox_dir / f"{session_id}.jsonl"
        try:
            transcript_path.write_bytes(body)
        except OSError as exc:
            logger.exception("transcript write failed")
            self._send_json(500, {"error": f"transcript write failed: {exc}"})
            return
        try:
            from .convo_miner import mine_convos
            mine_convos(
                convo_dir=str(inbox_dir),
                palace_path=str(palace_path),
                wing=wing,
            )
        except Exception as exc:  # noqa: BLE001 (surface any miner error to client)
            logger.exception("mine_convos failed")
            self._send_json(500, {"error": f"mining failed: {exc.__class__.__name__}"})
            return
        self._send_json(
            202,
            {
                "status": "ingested",
                "session_id": session_id,
                "wing": wing,
                "bytes": len(body),
            },
        )
 def start_ingest_server(host: str, port: int) -> ThreadingHTTPServer:
    """Bind the ingest server and serve in a daemon thread.
    Returns the server instance so callers can ``shutdown()`` if they need
    to. The thread is daemonic — it dies with the parent process, which is
    what we want when the MCP server's stdio loop exits.
    """
    server = ThreadingHTTPServer((host, port), _IngestHandler)
    thread = threading.Thread(
        target=server.serve_forever,
        name="mempalace-ingest",
        daemon=True,
    )
    thread.start()
    logger.info("Ingest server listening on http://%s:%d", host, port)
    return server
 def main():
    """Standalone entrypoint: ``python -m mempalace.ingest_server``.
    Used for local dev / testing; production runs the server in-process
    inside ``mempalace-mcp``.
    """
    logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr)
    host = os.environ.get("MEMPALACE_INGEST_HOST", "127.0.0.1")
    port = int(os.environ.get("MEMPALACE_INGEST_PORT", "8766"))
    server = start_ingest_server(host, port)
    try:
        threading.Event().wait()
    except KeyboardInterrupt:
        server.shutdown()
 if __name__ == "__main__":
    main()
@@ -18,6 +18,18 @@ Tools (write):
 Tools (maintenance):
  mempalace_reconnect       — force cache invalidation and reconnect after external writes
 Server-mode (optional)
 ----------------------
 Setting ``MEMPALACE_INGEST_PORT=<port>`` starts an HTTP transcript-ingest
 endpoint as a daemon thread inside this same process. Same Python
 runtime, same ChromaDB client — there is exactly one writer per palace
 (ChromaDB's HNSW index is not safe for multi-process writes). Used by
 the Unraid Docker deployment in ``deploy/unraid/`` so remote-aware
 hooks can POST transcripts without each client needing a local
 mempalace install. See ``mempalace/ingest_server.py`` and
 ``deploy/unraid/README.md``. The default stdio-only path is unaffected
 when the env var is unset.
 """
 import os
@@ -2245,6 +2257,21 @@ def main():
    # is visible at startup rather than on first use (#1222). Pure
    # filesystem read; never opens a chromadb client.
    _refresh_vector_disabled_flag()
    # Optional in-process HTTP ingest server. Same process so ChromaDB has
    # exactly one writer. Started only when MEMPALACE_INGEST_PORT is set
    # (i.e. the Unraid/Docker deployment) — the default stdio-only path is
    # unaffected.
    _ingest_port = os.environ.get("MEMPALACE_INGEST_PORT", "").strip()
    if _ingest_port:
        try:
            from .ingest_server import start_ingest_server
            start_ingest_server(
                host=os.environ.get("MEMPALACE_INGEST_HOST", "0.0.0.0"),
                port=int(_ingest_port),
            )
        except Exception as exc:  # noqa: BLE001
            logger.error("Failed to start ingest server: %s", exc)
    while True:
        try:
            line = sys.stdin.readline()
@@ -1 +0,0 @@
 OpenArena owner claim verification for MemPalace/mempalace: 09AE2C2E66CC4B5CBD7D
@@ -1,37 +0,0 @@
 # dependencies (bun install)
 node_modules
 # output
 out
 dist
 .vitepress/dist
 .vitepress/cache
 .vitepress/.temp
 *.tgz
 # code coverage
 coverage
 *.lcov
 # logs
 logs
 *.log
 report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
 # dotenv environment variable files
 .env
 .env.development.local
 .env.test.local
 .env.production.local
 .env.local
 # caches
 .eslintcache
 .cache
 *.tsbuildinfo
 # IntelliJ based IDEs
 .idea
 # Finder (MacOS) folder config
 .DS_Store
@@ -1,119 +0,0 @@
 import { defineConfig } from 'vitepress'
 import { withMermaid } from 'vitepress-plugin-mermaid'
 function normalizeBase(base?: string): string {
  if (!base || base === '/') {
    return '/'
  }
  return base.endsWith('/') ? base : `${base}/`
 }
 const docsBase = normalizeBase(process.env.DOCS_BASE || '/')
 const editBranch = process.env.DOCS_EDIT_BRANCH || 'main'
 const gaId = process.env.MEMPALACE_DOCS_GA_ID
 export default withMermaid(
  defineConfig({
    title: 'MemPalace',
    description: 'Give your AI a memory. Local-first storage and retrieval for AI workflows, with benchmark results and MCP tooling.',
    base: docsBase,
    head: [
      ['link', { rel: 'icon', href: `${docsBase}mempalace_logo.png` }],
      ['link', { rel: 'preconnect', href: 'https://api.fontshare.com' }],
      ['link', { href: 'https://api.fontshare.com/v2/css?f[]=neue-machina@300,400,500,700,800&f[]=satoshi@300,400,500,700&display=swap', rel: 'stylesheet' }],
      ['link', { rel: 'preconnect', href: 'https://fonts.googleapis.com' }],
      ['link', { rel: 'preconnect', href: 'https://fonts.gstatic.com', crossorigin: '' }],
      ['link', { href: 'https://fonts.googleapis.com/css2?family=Onest:wght@300;400;500;600&family=JetBrains+Mono:wght@400;500&display=swap', rel: 'stylesheet' }],
      ['meta', { property: 'og:title', content: 'MemPalace — AI Memory System' }],
      ['meta', { property: 'og:description', content: '96.6% LongMemEval recall. Zero API calls. Local, free, open source.' }],
      ['meta', { property: 'og:image', content: `${docsBase}mempalace_logo.png` }],
      ...(gaId ? [
        ['script', { async: '', src: `https://www.googletagmanager.com/gtag/js?id=${gaId}` }],
        ['script', {}, `window.dataLayer = window.dataLayer || [];\nfunction gtag(){dataLayer.push(arguments);}\ngtag('js', new Date());\ngtag('config', '${gaId}');`],
      ] as const : []),
    ],
    themeConfig: {
      logo: '/mempalace_logo.png',
      siteTitle: 'MemPalace',
      nav: [
        { text: 'Guide', link: '/guide/getting-started' },
        { text: 'Concepts', link: '/concepts/the-palace' },
        { text: 'Reference', link: '/reference/cli' },
      ],
      sidebar: {
        '/guide/': [
          {
            text: 'Guide',
            items: [
              { text: 'Getting Started', link: '/guide/getting-started' },
              { text: 'Mining Your Data', link: '/guide/mining' },
              { text: 'Searching Memories', link: '/guide/searching' },
              { text: 'MCP Integration', link: '/guide/mcp-integration' },
              { text: 'Claude Code Plugin', link: '/guide/claude-code' },
              { text: 'Gemini CLI', link: '/guide/gemini-cli' },
              { text: 'OpenClaw Skill', link: '/guide/openclaw' },
              { text: 'Local Models', link: '/guide/local-models' },
              { text: 'Auto-Save Hooks', link: '/guide/hooks' },
              { text: 'Configuration', link: '/guide/configuration' },
            ],
          },
        ],
        '/concepts/': [
          {
            text: 'Concepts',
            items: [
              { text: 'The Palace', link: '/concepts/the-palace' },
              { text: 'Memory Stack', link: '/concepts/memory-stack' },
              { text: 'AAAK Dialect', link: '/concepts/aaak-dialect' },
              { text: 'Knowledge Graph', link: '/concepts/knowledge-graph' },
              { text: 'Specialist Agents', link: '/concepts/agents' },
              { text: 'Contradiction Detection', link: '/concepts/contradiction-detection' },
            ],
          },
        ],
        '/reference/': [
          {
            text: 'Reference',
            items: [
              { text: 'CLI Commands', link: '/reference/cli' },
              { text: 'MCP Tools', link: '/reference/mcp-tools' },
              { text: 'Python API', link: '/reference/python-api' },
              { text: 'API Reference', link: '/reference/api-reference' },
              { text: 'Module Map', link: '/reference/modules' },
              { text: 'Benchmarks', link: '/reference/benchmarks' },
              { text: 'Contributing', link: '/reference/contributing' },
            ],
          },
        ],
      },
      socialLinks: [
        { icon: 'github', link: 'https://github.com/MemPalace/mempalace' },
        { icon: 'discord', link: 'https://discord.com/invite/ycTQQCu6kn' },
      ],
      search: {
        provider: 'local',
      },
      footer: {
        message: 'Released under the MIT License.',
        copyright: 'Copyright © 2026 MemPalace contributors',
      },
      editLink: {
        pattern: `https://github.com/MemPalace/mempalace/edit/${editBranch}/website/:path`,
        text: 'Edit this page on GitHub',
      },
    },
    mermaid: {
      theme: 'dark',
    },
  })
 )
@@ -1,29 +0,0 @@
 <script setup>
 import { useLandingEffects } from './landing/useLandingEffects.js'
 import FolioHeader from './landing/FolioHeader.vue'
 import HeroSection from './landing/HeroSection.vue'
 import ForgettingSection from './landing/ForgettingSection.vue'
 import AnatomySection from './landing/AnatomySection.vue'
 import DialectSection from './landing/DialectSection.vue'
 import MechanicsSection from './landing/MechanicsSection.vue'
 import InstallSection from './landing/InstallSection.vue'
 import CatalogFooter from './landing/CatalogFooter.vue'
 import './landing/landing.css'
 useLandingEffects()
 </script>
 <template>
  <div class="mempalace-landing">
    <div class="page">
      <FolioHeader />
      <HeroSection />
      <ForgettingSection />
      <AnatomySection />
      <DialectSection />
      <MechanicsSection />
      <InstallSection />
      <CatalogFooter />
    </div>
  </div>
 </template>
@@ -1,10 +0,0 @@
 import DefaultTheme from 'vitepress/theme'
 import Landing from './Landing.vue'
 import './style.css'
 export default {
  extends: DefaultTheme,
  enhanceApp({ app }) {
    app.component('Landing', Landing)
  },
 }
@@ -1,100 +0,0 @@
 <template>
  <section v-pre id="anatomy" class="anatomy">
    <div class="section-mark"><span class="roman">ii</span> <span>anatomy of a palace</span></div>
    <div class="anatomy-head">
      <div>
        <span class="eyebrow">the method of loci, updated</span>
        <h2 class="display">
          Wings. Rooms. Closets. <em>Drawers.</em>
        </h2>
      </div>
      <p class="lede">
        An ancient memory technique, reworked for a machine. Broad categories
        nest time-based groupings; time-based groupings bundle topics; topics
        hold verbatim drawers. A symbolic index lets the model scan thousands
        of drawers in a single pass and open only the ones it needs.
      </p>
    </div>
    <div class="anatomy-diagram">
      <article class="stratum">
        <span class="n">W — wing</span>
        <h3>The <em>Wings</em></h3>
        <p class="sub">people · projects · topics</p>
        <p>A broad region of the palace, keyed to a real entity — a person by name, a project by codename, a domain of your life. Entity-first, always.</p>
        <div class="diagram">
          <svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
            <rect x="5" y="20" width="190" height="50" opacity="0.4"/>
            <rect x="15" y="28" width="50" height="34" />
            <rect x="75" y="28" width="50" height="34" />
            <rect x="135" y="28" width="50" height="34" />
            <line x1="5" y1="12" x2="195" y2="12" stroke-dasharray="2 3" opacity="0.5"/>
          </svg>
        </div>
      </article>
      <article class="stratum">
        <span class="n">R — room</span>
        <h3>The <em>Rooms</em></h3>
        <p class="sub">days · sessions · threads</p>
        <p>Inside a wing sit rooms — discrete units of time. One room per day, or one per session. Walk the corridor and the palace unfolds chronologically, room by room.</p>
        <div class="diagram">
          <svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
            <rect x="10" y="20" width="36" height="44" />
            <rect x="56" y="20" width="36" height="44" />
            <rect x="102" y="20" width="36" height="44" />
            <rect x="148" y="20" width="36" height="44" />
            <line x1="10" y1="70" x2="184" y2="70" stroke-dasharray="1 3" opacity="0.6"/>
          </svg>
        </div>
      </article>
      <article class="stratum">
        <span class="n">C — closet</span>
        <h3>The <em>Closets</em></h3>
        <p class="sub">topics · threads · bundles</p>
        <p>Inside a room, closets group related drawers by topic or thread. Open one closet and you see every drawer on that subject together — no need to walk the whole room.</p>
        <div class="diagram">
          <svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
            <g class="closet">
              <rect x="10" y="14" width="54" height="52" />
              <line x1="37" y1="14" x2="37" y2="66" opacity="0.5"/>
              <circle cx="33" cy="40" r="1.2" fill="currentColor"/>
              <circle cx="41" cy="40" r="1.2" fill="currentColor"/>
            </g>
            <g class="closet">
              <rect x="73" y="14" width="54" height="52" />
              <line x1="100" y1="14" x2="100" y2="66" opacity="0.5"/>
              <circle cx="96" cy="40" r="1.2" fill="currentColor"/>
              <circle cx="104" cy="40" r="1.2" fill="currentColor"/>
            </g>
            <g class="closet">
              <rect x="136" y="14" width="54" height="52" />
              <line x1="163" y1="14" x2="163" y2="66" opacity="0.5"/>
              <circle cx="159" cy="40" r="1.2" fill="currentColor"/>
              <circle cx="167" cy="40" r="1.2" fill="currentColor"/>
            </g>
          </svg>
        </div>
      </article>
      <article class="stratum">
        <span class="n">D — drawer</span>
        <h3>The <em>Drawers</em></h3>
        <p class="sub">verbatim · permanent · exact</p>
        <p>Each room holds drawers. A drawer is a single chunk of verbatim content — the exact words, untouched. The palace's promise is kept here.</p>
        <div class="diagram">
          <svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
            <rect x="40" y="14" width="120" height="16" />
            <rect x="40" y="34" width="120" height="16" />
            <rect x="40" y="54" width="120" height="16" />
            <circle cx="150" cy="22" r="1.5" fill="currentColor"/>
            <circle cx="150" cy="42" r="1.5" fill="currentColor"/>
            <circle cx="150" cy="62" r="1.5" fill="currentColor"/>
          </svg>
        </div>
      </article>
    </div>
  </section>
 </template>
@@ -1,45 +0,0 @@
 <template>
  <footer v-pre class="catalog">
    <form class="waitlist waitlist-footer" data-source="footer" novalidate>
      <div class="waitlist-head">
        <span class="waitlist-pulse" aria-hidden="true"></span>
        <span class="waitlist-eyebrow">Last call &middot; subscribe for updates</span>
      </div>
      <div class="waitlist-row">
        <input type="email" class="waitlist-input" name="email" placeholder="you@example.com" autocomplete="email" aria-label="Email address" required />
        <button type="submit" class="waitlist-submit">
          <span class="waitlist-label-default">Join the list</span>
          <span class="waitlist-label-pending" aria-hidden="true">Joining…</span>
          <svg class="waitlist-arrow" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.6" aria-hidden="true"><path d="M5 12h14M13 6l6 6-6 6"/></svg>
          <svg class="waitlist-check" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" aria-hidden="true"><path d="M5 12l5 5 9-11"/></svg>
        </button>
      </div>
      <p class="waitlist-msg" aria-live="polite"></p>
    </form>
    <div class="catalog-card">
      <div>
        <p class="catalog-title">MemPalace <em>&mdash;</em> a memory palace for AI.</p>
        <p class="catalog-desc">Verbatim storage, local-first, zero telemetry. Built for people who believe their words are theirs.</p>
      </div>
      <div>
        <h4>Documentation</h4>
        <ul>
          <li><a href="/guide/getting-started">Getting started</a></li>
          <li><a href="/concepts/the-palace">The palace</a></li>
          <li><a href="/reference/cli">CLI reference</a></li>
          <li><a href="/reference/benchmarks">Benchmarks</a></li>
        </ul>
      </div>
      <div>
        <h4>The project</h4>
        <ul>
          <li><a href="https://github.com/MemPalace/mempalace">GitHub</a></li>
          <li><a href="https://github.com/MemPalace/mempalace/blob/main/README.md">Readme</a></li>
          <li><a href="https://github.com/MemPalace/mempalace/blob/main/ROADMAP.md">Roadmap</a></li>
          <li><a href="https://github.com/MemPalace/mempalace/blob/main/CHANGELOG.md">Changelog</a></li>
        </ul>
      </div>
    </div>
  </footer>
 </template>
@@ -1,64 +0,0 @@
 <template>
  <section v-pre id="dialect" class="dialect">
    <div class="section-mark"><span class="roman">iii</span> <span>the aaak dialect</span></div>
    <div class="dialect-head">
      <span class="eyebrow">index &larr; verbatim</span>
      <h2 class="display">
        A compressed symbolic language <em>for finding</em>, not remembering.
      </h2>
      <p class="lede">
        The content stays verbatim — always. The <em>index</em> above it is written
        in AAAK: a dense symbolic dialect an LLM can scan at a glance. Thousands
        of entries, one pass, exact drawer located.
      </p>
    </div>
    <div class="dialect-grid">
      <article class="slab">
        <header class="card-head">
          <span class="l">drawer · D-007</span>
          <span>verbatim · exact · permanent</span>
        </header>
        <p class="label">The drawer, as stored.</p>
        <p>
          "My son's name is <strong>Noah</strong>. He turns <strong>six</strong>
          on <strong>September 12th</strong>. He loves dinosaurs —
          especially the <strong>therizinosaurus</strong> because of the
          claws. We want to do a small party at <strong>the park on Glebe
          Point Road</strong>, maybe eight kids."
        </p>
        <p style="color:var(--ice-ghost); font-size: 13.5px; font-family: var(--f-mono); letter-spacing: 0.05em; margin-top:1.5rem;">
          &mdash; kept as spoken. never rewritten.
        </p>
      </article>
      <div class="dialect-arrow" aria-hidden="true">
        <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.3">
          <path d="M12 3v18M7 8l5-5 5 5M7 16l5 5 5-5"/>
        </svg>
        <span>index · AAAK</span>
      </div>
      <article class="slab mono">
        <header class="card-head">
          <span class="l">index · AAAK</span>
          <span>indexes · compressed · addressable</span>
        </header>
        <p class="label">The pointer, as indexed.</p>
 <pre><span class="c">§ W-042/R-11/D-007</span>
 <span class="k">@p</span> <span class="t">noah</span>~<span class="v">son.age=6</span>~<span class="v">dob=09-12</span>
 <span class="k">@l</span> <span class="t">glebe-pt-rd.park</span>
 <span class="k">@e</span> <span class="t">birthday</span>~<span class="v">party(n≈8)</span>
 <span class="k">@i</span> <span class="t">therizinosaurus</span>~<span class="v">claws</span>
 <span class="k">@t</span> <span class="v">2026-04-14T09:41</span>
 <span class="c">§ ptr → D-007 (verbatim)</span></pre>
      </article>
    </div>
    <p class="dialect-caption">
      Dense compression on the pointer layer. Full fidelity on the content
      layer. You get speed without ever losing a word.
    </p>
  </section>
 </template>
@@ -1,16 +0,0 @@
 <template>
  <header v-pre class="folio" role="banner">
    <div class="mark" aria-label="MemPalace">
      <img src="/mempalace_logo.png" alt="" aria-hidden="true" />
      <span>MemPalace</span>
    </div>
    <nav class="right" aria-label="Primary">
      <a href="#anatomy" class="hide-mobile">Anatomy</a>
      <a href="#dialect" class="hide-mobile">Dialect</a>
      <a href="#mechanics" class="hide-mobile">Mechanics</a>
      <a href="#install" class="hide-mobile">Install</a>
      <a href="/guide/getting-started">Docs</a>
      <a href="https://github.com/MemPalace/mempalace">GitHub ↗</a>
    </nav>
  </header>
 </template>
@@ -1,43 +0,0 @@
 <template>
  <section v-pre id="forgetting" class="forgetting">
    <div class="section-mark"><span class="roman">i</span> <span>the forgetting</span></div>
    <header class="forgetting-head">
      <div class="copy">
        <span class="eyebrow">before &middot; after</span>
        <h2 class="display">
          The same conversation, <em>twice.</em>
        </h2>
        <p class="lede" style="margin:0;">
          Scroll down and watch. On the left, a model without memory. On the right,
          the same model with MemPalace. The words are identical — until two weeks
          pass.
        </p>
      </div>
      <button class="replay" id="replay-demo" type="button" aria-label="Replay the demo">
        <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" aria-hidden="true"><path d="M4 4v6h6"/><path d="M20 20v-6h-6"/><path d="M4 10a8 8 0 0114-5l2 3"/><path d="M20 14a8 8 0 01-14 5l-2-3"/></svg>
        replay
      </button>
    </header>
    <div class="forgetting-compare" id="forgetting-compare" aria-label="Comparison demo">
      <article class="demo-pane demo-forget">
        <header>
          <span class="pane-tag">without mempalace</span>
          <span class="pane-meta">session <em>resets</em> &middot; no recall</span>
        </header>
        <div class="chat" data-pane="forget" aria-live="polite"></div>
      </article>
      <div class="divider" aria-hidden="true"></div>
      <article class="demo-pane demo-remember">
        <header>
          <span class="pane-tag">with mempalace</span>
          <span class="pane-meta">verbatim &middot; retrieved <em>instantly</em></span>
        </header>
        <div class="chat" data-pane="remember" aria-live="polite"></div>
      </article>
    </div>
  </section>
 </template>
@@ -1,80 +0,0 @@
 <template>
  <section v-pre class="hero" id="hero">
    <span class="corner-ticks" aria-hidden="true"><span></span></span>
    <div class="hero-inner">
      <div class="hero-copy">
        <h1 class="display">
          <span class="line">Memory <em class="is-accent">is</em></span>
          <span class="line line-2"><span class="identity-white">identity.</span></span>
        </h1>
        <p class="lede">
          Every conversation, every idea, every small decision&hellip; held somewhere safe.
          <br><br>Welcome to the future of memory: <span class="mp-blue">MemPalace</span>
        </p>
        <form class="waitlist waitlist-hero" data-source="hero" novalidate>
          <div class="waitlist-head">
            <span class="waitlist-pulse" aria-hidden="true"></span>
            <span class="waitlist-eyebrow">Subscribe for updates</span>
          </div>
          <div class="waitlist-row">
            <input
              type="email"
              class="waitlist-input"
              name="email"
              placeholder="you@example.com"
              autocomplete="email"
              aria-label="Email address"
              required
            />
            <button type="submit" class="waitlist-submit">
              <span class="waitlist-label-default">Join the list</span>
              <span class="waitlist-label-pending" aria-hidden="true">Joining…</span>
              <svg class="waitlist-arrow" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.6" aria-hidden="true">
                <path d="M5 12h14M13 6l6 6-6 6"/>
              </svg>
              <svg class="waitlist-check" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" aria-hidden="true">
                <path d="M5 12l5 5 9-11"/>
              </svg>
            </button>
          </div>
          <p class="waitlist-msg" aria-live="polite"></p>
        </form>
        <div class="hero-secondary">
          <a href="/guide/getting-started">Read the docs</a>
          <span class="sep" aria-hidden="true">·</span>
          <a href="https://github.com/MemPalace/mempalace">GitHub ↗</a>
        </div>
      </div>
      <!-- Palace video visual -->
      <div class="palace-stage" aria-hidden="true">
        <div class="halo"></div>
        <div class="stars">
          <i style="top:12%; left:22%;  --t:5s;   --d:0.0s"></i>
          <i style="top:18%; left:74%;  --t:6s;   --d:1.2s"></i>
          <i style="top:34%; left:8%;   --t:4s;   --d:0.6s"></i>
          <i style="top:44%; left:88%;  --t:7s;   --d:0.3s"></i>
          <i style="top:62%; left:14%;  --t:5.5s; --d:1.8s"></i>
          <i style="top:72%; left:82%;  --t:4.5s; --d:0.9s"></i>
          <i style="top:82%; left:38%;  --t:6.2s; --d:2.4s"></i>
          <i style="top:28%; left:52%;  --t:5.2s; --d:3.0s"></i>
          <i style="top:88%; left:60%;  --t:4.8s; --d:1.5s"></i>
          <i style="top:6%;  left:48%;  --t:6.8s; --d:0.4s"></i>
        </div>
        <video
          class="palace-video"
          src="/hero_video.mp4"
          autoplay
          muted
          loop
          playsinline
          disablepictureinpicture
        ></video>
      </div>
    </div>
  </section>
 </template>
@@ -1,38 +0,0 @@
 <template>
  <section v-pre id="install" class="install">
    <div class="section-mark" style="left:50%; transform:translateX(-50%);"><span class="roman">v</span> <span>begin</span></div>
    <span class="eyebrow" style="justify-content:center;">open a drawer</span>
    <h2 class="display">
      Build your <em>palace.</em>
    </h2>
    <p class="lede" style="margin-left:auto;margin-right:auto;text-align:center;">
      One command to install. One to initialize. Your words — yours, permanent,
      instantly recallable — from that moment on.
    </p>
    <div class="terminal" role="figure" aria-label="Installation commands">
      <div class="terminal-head">
        <span class="lights"><i></i><i></i><i></i></span>
        <span>~/mempalace &middot; bash</span>
      </div>
 <pre><span class="prompt">$</span> pip install -e <span class="dim">".[dev]"</span>
 <span class="c">Successfully installed mempalace</span>
 <span class="prompt">$</span> mempalace init
 <span class="ok">  ✓</span> palace created at <span class="dim">~/.mempalace</span>
 <span class="ok">  ✓</span> hooks registered <span class="dim">(stop, precompact)</span>
 <span class="ok">  ✓</span> knowledge graph initialized
 <span class="prompt">$</span> mempalace mine <span class="dim">./notes</span>
 <span class="ok">  ✓</span> filed · <span class="c">W-001/R-01/D-001</span></pre>
    </div>
    <div class="install-cta">
      <a href="/guide/getting-started" class="btn btn-primary">
        Read the docs
        <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M5 12h14M13 6l6 6-6 6"/></svg>
      </a>
      <a href="https://github.com/MemPalace/mempalace" class="btn">
        Visit the repository
      </a>
    </div>
  </section>
 </template>
@@ -1,83 +0,0 @@
 <template>
  <section v-pre id="mechanics">
    <div class="section-mark"><span class="roman">iv</span> <span>how it works</span></div>
    <div class="mechanics-head">
      <span class="eyebrow">mechanism · architecture</span>
      <h2 class="display">
        Four pieces. <em>No cloud.</em> No keys.
      </h2>
    </div>
    <div class="mechanics">
      <article class="mech">
        <div class="icon" aria-hidden="true">
          <svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
            <rect x="8" y="10" width="32" height="22" rx="1"/>
            <path d="M8 16h32"/>
            <g class="mech-bars">
              <path d="M14 24h20"/>
              <path d="M14 28h12"/>
            </g>
            <path d="M16 38h16M20 32v6M28 32v6"/>
            <circle class="mech-led" cx="36" cy="13.5" r="1.1" fill="currentColor"/>
          </svg>
        </div>
        <span class="eyebrow no-rule"><span class="n">— 01</span></span>
        <h3>Local-<em>first</em></h3>
        <p>ChromaDB on disk. SQLite for the knowledge graph. Nothing is uploaded. Nothing is synced. Your palace lives under a single directory on your machine.</p>
        <div class="metric">path · <b>~/.mempalace</b></div>
      </article>
      <article class="mech">
        <div class="icon" aria-hidden="true">
          <svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
            <circle class="mech-ring" cx="24" cy="24" r="14"/>
            <path class="mech-plus" d="M16 24h16M24 16v16"/>
            <path class="mech-slash" d="M10 10l28 28" stroke-width="1.5"/>
          </svg>
        </div>
        <span class="eyebrow no-rule"><span class="n">— 02</span></span>
        <h3>Zero <em>API</em></h3>
        <p>Extraction, chunking, and embedding all run locally. No OpenAI key, no Anthropic key, no sentence-transformers endpoint. The memory works even offline, on a plane.</p>
        <div class="metric">keys required · <b>none</b></div>
      </article>
      <article class="mech">
        <div class="icon" aria-hidden="true">
          <svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
            <path d="M8 36V18l8-8h16l8 8v18"/>
            <path d="M8 36h32"/>
            <g class="mech-gear">
              <circle cx="24" cy="26" r="4"/>
              <path d="M24 22v-4M24 30v4M20 26h-4M28 26h4"/>
            </g>
          </svg>
        </div>
        <span class="eyebrow no-rule"><span class="n">— 03</span></span>
        <h3>Background <em>hooks</em></h3>
        <p>Filing and indexing happen silently through Claude Code hooks. On session end, on pre-compaction. You write. The palace fills itself behind the curtain.</p>
        <div class="metric">hook budget · <b>&lt;500 ms</b></div>
      </article>
      <article class="mech">
        <div class="icon" aria-hidden="true">
          <svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
            <path class="mech-edges" d="M12 14l10 10M36 12L26 24M14 36l8-8M36 34l-10-6" opacity="0.6"/>
            <g class="mech-nodes">
              <circle cx="10" cy="12" r="3"/>
              <circle cx="38" cy="10" r="3"/>
              <circle cx="24" cy="26" r="3"/>
              <circle cx="12" cy="38" r="3"/>
              <circle cx="38" cy="36" r="3"/>
            </g>
          </svg>
        </div>
        <span class="eyebrow no-rule"><span class="n">— 04</span></span>
        <h3>Temporal <em>graph</em></h3>
        <p>Relationships across entities with valid-from and valid-to dates. Who worked on what. When did this change. Facts that were true then, and may not be now.</p>
        <div class="metric">store · <b>sqlite</b></div>
      </article>
    </div>
  </section>
 </template>
@@ -1,406 +0,0 @@
 import { onMounted, onBeforeUnmount } from 'vue'
 export function useLandingEffects() {
 // Shared cleanup registry — IIFEs push disconnect/removeEventListener thunks
 // here so onBeforeUnmount can tear everything down on SPA nav.
 const cleanups = []
 onMounted(() => {
  if (typeof document === 'undefined') return
  // Hide VitePress chrome while the landing component is live, restore on leave.
  document.body.classList.add('mempalace-active')
  /* ---------- Waitlist submission ---------- */
  ;(function initWaitlist(){
    const ENDPOINT = 'https://br.staging.mempalaceofficial.com/waitlist'
    const forms = document.querySelectorAll('.mempalace-landing .waitlist')
    const emailRe = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
    forms.forEach(form => {
      const input  = form.querySelector('.waitlist-input')
      const button = form.querySelector('.waitlist-submit')
      const msg    = form.querySelector('.waitlist-msg')
      const source = form.dataset.source || 'landing'
      function setState(state, text) {
        form.classList.remove('is-pending', 'is-success', 'is-error')
        if (state) form.classList.add('is-' + state)
        if (text != null) msg.textContent = text
      }
      const onSubmit = async (e) => {
        e.preventDefault()
        if (form.classList.contains('is-success') || form.classList.contains('is-pending')) return
        const email = (input.value || '').trim()
        if (!emailRe.test(email)) {
          setState('error', 'Please provide a valid email address.')
          input.focus()
          return
        }
        setState('pending', 'Sending…')
        button.disabled = true
        input.disabled = true
        try {
          const res = await fetch(ENDPOINT, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ email, source })
          })
          let data = null
          try { data = await res.json() } catch (_) { /* no body */ }
          if (res.ok) {
            setState('success', (data && data.message) || "Success! You're on the list for updates.")
            // keep inputs disabled so they can't resubmit accidentally
            input.value = email
            return
          }
          if (res.status === 429) {
            setState('error', 'Whoa — slow down a moment, then try again.')
          } else if (res.status === 400) {
            setState('error', (data && data.message) || 'Please provide a valid email address.')
          } else {
            setState('error', (data && data.message) || 'Something went wrong. Please try again later.')
          }
          button.disabled = false
          input.disabled = false
        } catch (_err) {
          setState('error', 'Network error — please try again.')
          button.disabled = false
          input.disabled = false
        }
      }
      const onInput = () => {
        if (form.classList.contains('is-error')) setState(null, '')
      }
      form.addEventListener('submit', onSubmit)
      input.addEventListener('input', onInput)
      cleanups.push(() => {
        form.removeEventListener('submit', onSubmit)
        input.removeEventListener('input', onInput)
      })
    })
  })()
  /* ---------- Reveal-on-scroll for cards ---------- */
  ;(function(){
    if (!('IntersectionObserver' in window)) return
    const items = document.querySelectorAll('.mempalace-landing .stratum, .mempalace-landing .mech, .mempalace-landing .slab')
    items.forEach(el => {
      el.style.opacity = '0'
      el.style.transform = 'translateY(20px)'
      el.style.transition = 'opacity 0.9s ease, transform 0.9s ease'
    })
    const io = new IntersectionObserver((entries) => {
      entries.forEach((entry) => {
        if (entry.isIntersecting){
          const idx = [...entry.target.parentElement.children].indexOf(entry.target)
          entry.target.style.transitionDelay = (idx * 80) + 'ms'
          entry.target.style.opacity = '1'
          entry.target.style.transform = 'translateY(0)'
          io.unobserve(entry.target)
        }
      })
    }, { rootMargin: '0px 0px -80px 0px' })
    items.forEach(el => io.observe(el))
    cleanups.push(() => io.disconnect())
  })()
  /* ---------- Forgetting demo ---------- */
  ;(function initForgettingDemo(){
    const compare = document.getElementById('forgetting-compare')
    if (!compare) return
    const leftChat  = compare.querySelector('[data-pane="forget"]')
    const rightChat = compare.querySelector('[data-pane="remember"]')
    const replayBtn = document.getElementById('replay-demo')
    const reduced   = window.matchMedia('(prefers-reduced-motion: reduce)').matches
    const delay = ms => new Promise(r => setTimeout(r, reduced ? Math.min(ms, 60) : ms))
    function clear() {
      leftChat.innerHTML = ''
      rightChat.innerHTML = ''
      if (replayBtn) replayBtn.classList.remove('visible')
    }
    function addMsg(chat, who, opts = {}) {
      const row = document.createElement('div')
      row.className = 'msg ' + (who === 'You' ? 'you' : 'ai')
      if (opts.id) row.dataset.id = opts.id
      row.innerHTML = '<span class="who">' + who + '</span><span class="body"></span>'
      chat.appendChild(row)
      chat.scrollTop = chat.scrollHeight
      return row
    }
    async function typeInto(row, text, speed = 14) {
      const body = row.querySelector('.body')
      const parts = text.split(/(<[^>]+>)/)
      row.classList.add('typing')
      for (const part of parts) {
        if (!part) continue
        if (part.startsWith('<')) { body.insertAdjacentHTML('beforeend', part); continue }
        for (const ch of part) {
          body.insertAdjacentText('beforeend', ch)
          if (!reduced) await delay(speed + (Math.random() < 0.08 ? 40 : 0))
        }
      }
      row.classList.remove('typing')
    }
    function addDivider(chat, text) {
      const d = document.createElement('div')
      d.className = 'divider-time'
      d.textContent = '— ' + text + ' —'
      chat.appendChild(d)
      return d
    }
    function addRetrieval(chat, callNumber, ms) {
      const row = document.createElement('div')
      row.className = 'retrieval'
      row.innerHTML =
        '<span class="who">mem</span>' +
        '<span class="l">retrieved &middot; <span class="r">' + callNumber + '</span></span>' +
        '<span>' + ms + '&nbsp;ms</span>'
      chat.appendChild(row)
      return row
    }
    function addStamp(chat, text, callNumber) {
      const el = document.createElement('div')
      el.className = 'stamp'
      el.innerHTML = '<span>— ' + text + '</span>' +
        (callNumber ? '<span class="call">' + callNumber + '</span>' : '')
      chat.appendChild(el)
      return el
    }
    function disintegrate(target) {
      return new Promise(resolve => {
        const parent = target.closest('.chat')
        if (!parent) { resolve(); return }
        const parentRect = parent.getBoundingClientRect()
        const style = getComputedStyle(target)
        const font = style.font ||
          (style.fontStyle + ' ' + style.fontWeight + ' ' + style.fontSize + '/' + style.lineHeight + ' ' + style.fontFamily)
        const color = style.color
        let overlay = parent.querySelector('.dust-overlay')
        if (!overlay) {
          overlay = document.createElement('div')
          overlay.className = 'dust-overlay'
          parent.appendChild(overlay)
        }
        const walker = document.createTreeWalker(target, NodeFilter.SHOW_TEXT)
        const range = document.createRange()
        const spans = []
        let node
        while ((node = walker.nextNode())) {
          const chars = node.textContent
          for (let i = 0; i < chars.length; i++) {
            if (chars[i] === ' ') continue
            range.setStart(node, i)
            range.setEnd(node, i + 1)
            const r = range.getBoundingClientRect()
            if (r.width === 0 || r.height === 0) continue
            const span = document.createElement('span')
            span.className = 'dust'
            span.textContent = chars[i]
            span.style.left = (r.left - parentRect.left) + 'px'
            span.style.top  = (r.top  - parentRect.top)  + 'px'
            span.style.width  = r.width  + 'px'
            span.style.height = r.height + 'px'
            span.style.font = font
            span.style.color = color
            span.style.opacity = '1'
            span.style.transform = 'translate(0,0)'
            span.style.transitionDuration = (1500 + Math.random() * 900) + 'ms'
            overlay.appendChild(span)
            spans.push(span)
          }
        }
        target.style.transition = 'color 0.35s ease, opacity 0.35s ease'
        target.style.color = 'transparent'
        void overlay.offsetHeight
        const cx = parentRect.width / 2
        spans.forEach((s) => {
          s.style.transitionDelay = (Math.random() * 500) + 'ms'
          const x0 = parseFloat(s.style.left)
          const dx = (x0 - cx) * 0.06 + (Math.random() - 0.5) * 36
          const dy = 30 + Math.random() * 80
          const rot = (Math.random() - 0.5) * 44
          s.style.transform = 'translate(' + dx + 'px,' + dy + 'px) rotate(' + rot + 'deg)'
          s.style.opacity = '0'
          s.style.filter = 'blur(2px)'
        })
        setTimeout(() => {
          spans.forEach(s => s.remove())
          resolve()
        }, reduced ? 200 : 2600)
      })
    }
    const NOAH_TEXT = "My son's name is Noah. He turns six on September 12th."
    async function runForget() {
      const you1 = addMsg(leftChat, 'You', { id: 'noah' })
      await delay(200)
      await typeInto(you1, NOAH_TEXT, 16)
      await delay(500)
      const ai1 = addMsg(leftChat, 'Model')
      await typeInto(ai1, "Noted. I'll remember that for next time we talk.", 14)
      await delay(900)
      addDivider(leftChat, 'two weeks later')
      await delay(700)
      const you2 = addMsg(leftChat, 'You')
      await typeInto(you2, "Help me plan Noah's birthday.", 18)
      await delay(700)
      const target = leftChat.querySelector('.msg[data-id="noah"] .body')
      if (target) await disintegrate(target)
      await delay(250)
      const ai2 = addMsg(leftChat, 'Model')
      await typeInto(ai2, "Of course. Who is Noah? How old is he turning?", 16)
      await delay(500)
      addStamp(leftChat, 'forgotten.')
    }
    async function runRemember() {
      const you1 = addMsg(rightChat, 'You', { id: 'noah' })
      await delay(200)
      await typeInto(you1, NOAH_TEXT, 16)
      await delay(500)
      const ai1 = addMsg(rightChat, 'Model')
      await typeInto(ai1, "Noted. Filed — <strong>W-042/R-01/D-003</strong>.", 14)
      await delay(900)
      addDivider(rightChat, 'two weeks later')
      await delay(700)
      const you2 = addMsg(rightChat, 'You')
      await typeInto(you2, "Help me plan Noah's birthday.", 18)
      await delay(600)
      addRetrieval(rightChat, 'W-042/R-01/D-003', 42)
      await delay(700)
      const ai2 = addMsg(rightChat, 'Model')
      await typeInto(ai2,
        "Of course — <strong>Noah</strong> turns <strong>six</strong> on <strong>September 12th</strong>. " +
        "You mentioned he loves the <strong>therizinosaurus</strong>, and a park on " +
        "<strong>Glebe Point Road</strong>. Shall we build from there?",
        11)
      await delay(500)
      addStamp(rightChat, 'remembered.', 'W-042/R-01/D-003')
    }
    let running = { forget: false, remember: false }
    let started = { forget: false, remember: false }
    async function runBoth() {
      if (running.forget || running.remember) return
      running.forget = running.remember = true
      started.forget = started.remember = true
      clear()
      await delay(200)
      await Promise.all([runForget(), runRemember()])
      running.forget = running.remember = false
      if (replayBtn) replayBtn.classList.add('visible')
    }
    async function runSide(side) {
      if (running[side] || started[side]) return
      running[side] = true
      started[side] = true
      const chat = side === 'forget' ? leftChat : rightChat
      chat.innerHTML = ''
      await delay(200)
      await (side === 'forget' ? runForget() : runRemember())
      running[side] = false
      if (started.forget && started.remember && !running.forget && !running.remember && replayBtn) {
        replayBtn.classList.add('visible')
      }
    }
    function resetAll() {
      started.forget = started.remember = false
      clear()
    }
    const stackedMQ = window.matchMedia('(max-width: 900px)')
    const isStacked = () => stackedMQ.matches
    function observeOnce(el, onReach) {
      if (!('IntersectionObserver' in window)) { onReach(); return null }
      let done = false
      const io = new IntersectionObserver((entries) => {
        entries.forEach(entry => {
          if (done || !entry.isIntersecting) return
          const rect = entry.boundingClientRect
          const elementCoverage  = entry.intersectionRatio
          const viewportCoverage = entry.intersectionRect.height / window.innerHeight
          const mostlyVisible  = elementCoverage >= 0.65
          const dominatesView  = viewportCoverage >= 0.60 && rect.top <= window.innerHeight * 0.15
          if (mostlyVisible || dominatesView) {
            done = true
            onReach()
            io.disconnect()
          }
        })
      }, {
        threshold: [0.1, 0.25, 0.4, 0.55, 0.7, 0.85, 1.0],
        rootMargin: '-8% 0px -8% 0px'
      })
      io.observe(el)
      return io
    }
    let observers = []
    function disconnectObservers() {
      observers.forEach(io => io && io.disconnect())
      observers = []
    }
    function armObservers() {
      disconnectObservers()
      if (isStacked()) {
        observers.push(observeOnce(compare.querySelector('.demo-forget'),   () => runSide('forget')))
        observers.push(observeOnce(compare.querySelector('.demo-remember'), () => runSide('remember')))
      } else {
        observers.push(observeOnce(compare, runBoth))
      }
    }
    const onReplayClick = () => {
      resetAll()
      armObservers()
    }
    if (replayBtn) replayBtn.addEventListener('click', onReplayClick)
    armObservers()
    cleanups.push(() => {
      disconnectObservers()
      if (replayBtn) replayBtn.removeEventListener('click', onReplayClick)
    })
  })()
 })
 onBeforeUnmount(() => {
  if (typeof document === 'undefined') return
  document.body.classList.remove('mempalace-active')
  while (cleanups.length) {
    const fn = cleanups.pop()
    try { fn() } catch (_) { /* swallow — teardown best-effort */ }
  }
 })
 }
--- a/Show More
+++ b/Show More
		`@@ -1 +0,0 @@`
			`OpenArena owner claim verification for MemPalace/mempalace: 09AE2C2E66CC4B5CBD7D`