cleanup and remote only

This commit is contained in:
2026-05-09 10:52:25 -05:00
parent 2fc47a52fc
commit 40e5e5e3cc
136 changed files with 1502 additions and 349529 deletions
-20
View File
@@ -1,20 +0,0 @@
{
"name": "mempalace",
"interface": {
"displayName": "MemPalace"
},
"plugins": [
{
"name": "mempalace",
"source": {
"source": "local",
"path": "./.codex-plugin"
},
"policy": {
"installation": "AVAILABLE",
"authentication": "NONE"
},
"category": "Coding"
}
]
}
-5
View File
@@ -1,5 +0,0 @@
{
"mempalace": {
"command": "mempalace-mcp"
}
}
-57
View File
@@ -1,57 +0,0 @@
# MemPalace Claude Code Plugin
A Claude Code plugin that gives your AI a persistent memory system. Mine projects and conversations into a searchable palace backed by ChromaDB, with 19 MCP tools, auto-save hooks, and 5 guided skills.
## Prerequisites
- Python 3.9+
## Installation
### Claude Code Marketplace
```bash
claude plugin marketplace add MemPalace/mempalace
claude plugin install --scope user mempalace
```
### Local Clone
```bash
claude plugin add /path/to/mempalace
```
## Post-Install Setup
After installing the plugin, run the init command to complete setup (installs the `mempalace` package via `uv tool` or `pip`, configures MCP, etc.):
```
/mempalace:init
```
## Available Slash Commands
| Command | Description |
|---------|-------------|
| `/mempalace:help` | Show available tools, skills, and architecture |
| `/mempalace:init` | Set up MemPalace -- install, configure MCP, onboard |
| `/mempalace:search` | Search your memories across the palace |
| `/mempalace:mine` | Mine projects and conversations into the palace |
| `/mempalace:status` | Show palace overview -- wings, rooms, drawer counts |
## Hooks
MemPalace registers two hooks that run automatically:
- **Stop** -- Saves conversation context every 15 messages.
- **PreCompact** -- Preserves important memories before context compaction.
Set the `MEMPAL_DIR` environment variable to a directory path to automatically run `mempalace mine` on that directory during each save trigger.
## MCP Server
The plugin automatically configures a local MCP server with 19 tools for storing, searching, and managing memories. No manual MCP setup is required -- `/mempalace:init` handles everything.
## Full Documentation
See the main [README](../README.md) for complete documentation, architecture details, and advanced usage.
-6
View File
@@ -1,6 +0,0 @@
---
description: Show comprehensive MemPalace help — available skills, MCP tools, CLI commands, hooks, and architecture.
allowed-tools: Bash, Read
---
Invoke the generic mempalace skill (using the Skill tool) with the `help` command, then follow its instructions.
-6
View File
@@ -1,6 +0,0 @@
---
description: Set up MemPalace — install the package, initialize a palace, configure MCP server, and verify everything works.
allowed-tools: Bash, Read, Write, Edit, Glob, Grep
---
Invoke the generic mempalace skill (using the Skill tool) with the `init` command, then follow its instructions.
-7
View File
@@ -1,7 +0,0 @@
---
description: Mine projects and conversations into the MemPalace. Supports project files, conversation exports, and auto-classification.
argument-hint: Path to project or conversation export to mine.
allowed-tools: Bash, Read, Write, Edit, Glob, Grep
---
Invoke the generic mempalace skill (using the Skill tool) with the `mine` command, then follow its instructions.
-7
View File
@@ -1,7 +0,0 @@
---
description: Search your memories across the MemPalace using semantic search with wing/room filtering.
argument-hint: Search query, optionally with wing/room filters.
allowed-tools: Bash, Read
---
Invoke the generic mempalace skill (using the Skill tool) with the `search` command, then follow its instructions.
-6
View File
@@ -1,6 +0,0 @@
---
description: Show the current state of your memory palace — wings, rooms, drawer counts, and suggestions.
allowed-tools: Bash, Read
---
Invoke the generic mempalace skill (using the Skill tool) with the `status` command, then follow its instructions.
-25
View File
@@ -1,25 +0,0 @@
{
"description": "MemPalace auto-save and pre-compact hooks",
"hooks": {
"Stop": [
{
"hooks": [
{
"type": "command",
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/mempal-stop-hook.sh\""
}
]
}
],
"PreCompact": [
{
"hooks": [
{
"type": "command",
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/mempal-precompact-hook.sh\""
}
]
}
]
}
}
@@ -1,24 +0,0 @@
#!/bin/bash
# MemPalace PreCompact Hook — thin wrapper calling Python CLI
# All logic lives in mempalace.hooks_cli for cross-harness extensibility
run_mempalace_hook() {
if command -v mempalace >/dev/null 2>&1; then
mempalace hook run "$@"
return $?
fi
if command -v python3 >/dev/null 2>&1 && python3 -c "import mempalace" >/dev/null 2>&1; then
python3 -m mempalace hook run "$@"
return $?
fi
if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
python -m mempalace hook run "$@"
return $?
fi
echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
return 1
}
run_mempalace_hook --hook precompact --harness claude-code
-24
View File
@@ -1,24 +0,0 @@
#!/bin/bash
# MemPalace Stop Hook — thin wrapper calling Python CLI
# All logic lives in mempalace.hooks_cli for cross-harness extensibility
run_mempalace_hook() {
if command -v mempalace >/dev/null 2>&1; then
mempalace hook run "$@"
return $?
fi
if command -v python3 >/dev/null 2>&1 && python3 -c "import mempalace" >/dev/null 2>&1; then
python3 -m mempalace hook run "$@"
return $?
fi
if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
python -m mempalace hook run "$@"
return $?
fi
echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
return 1
}
run_mempalace_hook --hook stop --harness claude-code
-18
View File
@@ -1,18 +0,0 @@
{
"name": "mempalace",
"owner": {
"name": "milla-jovovich",
"url": "https://github.com/MemPalace"
},
"plugins": [
{
"name": "mempalace",
"source": "./.claude-plugin",
"description": "AI memory system — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, guided setup.",
"version": "3.3.4",
"author": {
"name": "milla-jovovich"
}
}
]
}
-25
View File
@@ -1,25 +0,0 @@
{
"name": "mempalace",
"version": "3.3.4",
"description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.",
"author": {
"name": "milla-jovovich"
},
"license": "MIT",
"commands": [],
"mcpServers": {
"mempalace": {
"command": "mempalace-mcp"
}
},
"keywords": [
"memory",
"ai",
"rag",
"mcp",
"chromadb",
"palace",
"search"
],
"repository": "https://github.com/MemPalace/mempalace"
}
-35
View File
@@ -1,35 +0,0 @@
---
name: mempalace
description: MemPalace — mine projects and conversations into a searchable memory palace. Use when asked about mempalace, memory palace, mining memories, searching memories, or palace setup.
allowed-tools: Bash, Read, Write, Edit, Glob, Grep
---
# MemPalace
A searchable memory palace for AI — mine projects and conversations, then search them semantically.
## Prerequisites
Ensure `mempalace` is installed:
```bash
mempalace --version
```
If not installed (uv recommended):
```bash
uv tool install mempalace # or: pip install mempalace
```
## Usage
MemPalace provides dynamic instructions via the CLI. To get instructions for any operation:
```bash
mempalace instructions <command>
```
Where `<command>` is one of: `help`, `init`, `mine`, `search`, `status`.
Run the appropriate instructions command, then follow the returned instructions step by step.
-80
View File
@@ -1,80 +0,0 @@
# MemPalace - Codex CLI Plugin
Give your AI a persistent memory -- mine projects and conversations into a searchable palace backed by ChromaDB, with 19 MCP tools, auto-save hooks, and guided skills.
## Prerequisites
- Python 3.9+
- Codex CLI installed and configured
- `uv tool install mempalace` (recommended) or `pip install mempalace`
## Installation
### Local Install
1. Copy or symlink the `.codex-plugin` directory into your project root:
```bash
cp -r .codex-plugin /path/to/your/project/.codex-plugin
```
2. Verify the plugin is detected:
```bash
codex --plugins
```
3. Initialize your palace:
```bash
codex /init
```
### Git Install
1. Clone the MemPalace repository:
```bash
git clone https://github.com/MemPalace/mempalace.git
cd mempalace
```
2. Install the Python package so the `mempalace-mcp` script lands on
your PATH (the bundled `plugin.json` invokes it by bare name):
```bash
uv tool install --editable . # or: pip install -e .
```
Plain `uv sync` is **not** enough here — it installs the scripts into
`.venv/bin/`, which Codex will not find unless you activate the venv
before launching Codex.
3. The `.codex-plugin` directory is already in the repo root. Codex CLI will detect it automatically when you run Codex from inside the repository.
4. Initialize your palace:
```bash
codex /init
```
## Available Skills
| Skill | Description |
|-------|-------------|
| `/help` | Show available commands and usage tips |
| `/init` | Initialize a new memory palace |
| `/search` | Semantic search across all mined memories |
| `/mine` | Mine a project or conversation into your palace |
| `/status` | Show palace status, room counts, and health |
## Hooks
The plugin includes auto-save hooks that run on session stop (every 15 messages) and before context compaction, automatically preserving conversation context into your palace.
Set the `MEMPAL_DIR` environment variable to a directory path to automatically run `mempalace mine` on that directory during each save trigger.
## Support
- Repository: https://github.com/MemPalace/mempalace
- Issues: https://github.com/MemPalace/mempalace/issues
-37
View File
@@ -1,37 +0,0 @@
{
"hooks": {
"SessionStart": [
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": "\"${CODEX_PLUGIN_ROOT}/hooks/mempal-hook.sh\" session-start"
}
]
}
],
"Stop": [
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": "\"${CODEX_PLUGIN_ROOT}/hooks/mempal-hook.sh\" stop"
}
]
}
],
"PreCompact": [
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": "\"${CODEX_PLUGIN_ROOT}/hooks/mempal-hook.sh\" precompact"
}
]
}
]
}
}
-9
View File
@@ -1,9 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
HOOK_NAME="${1:?Usage: mempal-hook.sh <hook-name>}"
INPUT_FILE=$(mktemp) || { echo "Failed to create temp file" >&2; exit 1; }
cat > "$INPUT_FILE"
cat "$INPUT_FILE" | mempalace hook run --hook "$HOOK_NAME" --harness codex
EXIT_CODE=$?
rm -f "$INPUT_FILE" 2>/dev/null
exit $EXIT_CODE
-48
View File
@@ -1,48 +0,0 @@
{
"name": "mempalace",
"version": "3.3.4",
"description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.",
"author": {
"name": "milla-jovovich"
},
"homepage": "https://github.com/MemPalace/mempalace",
"repository": "https://github.com/MemPalace/mempalace",
"license": "MIT",
"keywords": [
"memory",
"ai",
"rag",
"mcp",
"chromadb",
"palace",
"search"
],
"skills": "./skills/",
"hooks": "./hooks.json",
"mcpServers": {
"mempalace": {
"command": "mempalace-mcp"
}
},
"interface": {
"displayName": "MemPalace",
"shortDescription": "AI memory system for Codex",
"longDescription": "Give your AI a persistent memory — mine projects and conversations into a searchable palace backed by ChromaDB, with 19 MCP tools, auto-save hooks, and guided skills.",
"developerName": "milla-jovovich",
"category": "Coding",
"capabilities": [
"Interactive",
"Read",
"Write"
],
"websiteURL": "https://github.com/MemPalace/mempalace",
"privacyPolicyURL": "https://github.com/MemPalace/mempalace",
"termsOfServiceURL": "https://github.com/MemPalace/mempalace",
"defaultPrompt": [
"Search my memories for recent decisions",
"Mine this project into my memory palace",
"Show my palace status and room counts"
],
"brandColor": "#7C3AED"
}
}
-13
View File
@@ -1,13 +0,0 @@
---
name: help
description: Show MemPalace help — available commands, usage tips, and getting started guidance.
allowed-tools: Bash, Read
---
# MemPalace Help
Run the following command and follow the returned instructions step by step:
```bash
mempalace instructions help
```
-13
View File
@@ -1,13 +0,0 @@
---
name: init
description: Initialize a new MemPalace — guided setup for your AI memory palace with ChromaDB backend.
allowed-tools: Bash, Read, Write, Edit
---
# MemPalace Init
Run the following command and follow the returned instructions step by step:
```bash
mempalace instructions init
```
-13
View File
@@ -1,13 +0,0 @@
---
name: mine
description: Mine a project or conversation into your MemPalace — extract and store memories for later retrieval.
allowed-tools: Bash, Read, Glob, Grep
---
# MemPalace Mine
Run the following command and follow the returned instructions step by step:
```bash
mempalace instructions mine
```
-13
View File
@@ -1,13 +0,0 @@
---
name: search
description: Search your MemPalace — semantic search across all mined memories, projects, and conversations.
allowed-tools: Bash, Read
---
# MemPalace Search
Run the following command and follow the returned instructions step by step:
```bash
mempalace instructions search
```
-13
View File
@@ -1,13 +0,0 @@
---
name: status
description: Show MemPalace status — room counts, storage usage, and palace health.
allowed-tools: Bash, Read
---
# MemPalace Status
Run the following command and follow the returned instructions step by step:
```bash
mempalace instructions status
```
-25
View File
@@ -1,25 +0,0 @@
{
"name": "MemPalace",
"image": "mcr.microsoft.com/devcontainers/python:3.11",
"features": {
"ghcr.io/devcontainers/features/github-cli:1": {}
},
"postCreateCommand": "bash .devcontainer/post-create.sh",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.debugpy",
"charliermarsh.ruff"
],
"settings": {
"python.defaultInterpreterPath": "/usr/local/bin/python",
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["tests/", "-v", "--ignore=tests/benchmarks"],
"ruff.importStrategy": "fromEnvironment",
"editor.formatOnSave": true,
"editor.defaultFormatter": "charliermarsh.ruff"
}
}
}
}
-21
View File
@@ -1,21 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
echo "=== MemPalace Dev Container Setup ==="
pip install -e ".[dev]"
# Match CI's ruff pin (pyproject only sets a floor; without this contributors
# get a newer ruff locally than CI runs, causing phantom lint failures).
pip install "ruff>=0.4.0,<0.5"
pip install pre-commit
pre-commit install
echo ""
echo "=== Verification ==="
echo "python: $(python --version)"
echo "pytest: $(python -m pytest --version 2>&1 | head -1)"
echo "ruff: $(python -m ruff --version 2>&1 | head -1)"
echo ""
echo "Ready. Run: pytest tests/ -v --ignore=tests/benchmarks"
+36
View File
@@ -0,0 +1,36 @@
.git
.github
.devcontainer
.claude-plugin
.codex-plugin
.agents
benchmarks
tests
docs
website
landing
assets
examples
tools
hooks
integrations
deploy
*.pyc
__pycache__
.coverage
.pytest_cache
htmlcov
dist
build
*.egg-info
.python-version
.pre-commit-config.yaml
uv.lock
CHANGELOG.md
CONTRIBUTING.md
SECURITY.md
ROADMAP.md
MISSION.md
CLAUDE.md
AGENTS.md
openarena-claim.txt
-13
View File
@@ -1,13 +0,0 @@
# Default owners for everything
* @milla-jovovich @bensig @igorls
# Core library
mempalace/ @milla-jovovich @bensig
# CI and workflows
.github/ @bensig
# Plugins and integrations
.claude-plugin/ @bensig
.codex-plugin/ @bensig
integrations/ @bensig
-20
View File
@@ -1,20 +0,0 @@
---
name: Bug Report
about: Something isn't working
labels: bug
---
**What happened?**
**What did you expect?**
**How to reproduce:**
1.
2.
3.
**Environment:**
- OS:
- Python version:
- MemPal version: (check `python mempal.py --version` or git SHA)
-11
View File
@@ -1,11 +0,0 @@
---
name: Feature Request
about: Suggest an improvement
labels: enhancement
---
**What problem does this solve?**
**What's the proposed solution?**
**Alternatives considered:**
-8
View File
@@ -1,8 +0,0 @@
## What does this PR do?
## How to test
## Checklist
- [ ] Tests pass (`python -m pytest tests/ -v`)
- [ ] No hardcoded paths
- [ ] Linter passes (`ruff check .`)
-12
View File
@@ -1,12 +0,0 @@
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 5
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 3
@@ -1,51 +0,0 @@
name: Bump Version
on:
push:
branches: [main]
jobs:
bump-version:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v6
- name: Bump patch version
run: |
CURRENT=$(python3 -c "exec(open('mempalace/version.py').read()); print(__version__)")
IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
PATCH=$((PATCH + 1))
NEW="${MAJOR}.${MINOR}.${PATCH}"
echo "__version__ = \"${NEW}\"" > mempalace/version.py
# Prepend docstring
sed -i '1i"""Single source of truth for the MemPalace package version."""\n' mempalace/version.py
echo "version=$NEW" >> "$GITHUB_OUTPUT"
id: version
- name: Sync plugin.json
run: |
jq --arg v "${{ steps.version.outputs.version }}" '.version = $v' .claude-plugin/plugin.json > tmp.json && mv tmp.json .claude-plugin/plugin.json
- name: Sync marketplace.json
run: |
jq --arg v "${{ steps.version.outputs.version }}" '.plugins[0].version = $v' .claude-plugin/marketplace.json > tmp.json && mv tmp.json .claude-plugin/marketplace.json
- name: Sync codex plugin.json
run: |
jq --arg v "${{ steps.version.outputs.version }}" '.version = $v' .codex-plugin/plugin.json > tmp.json && mv tmp.json .codex-plugin/plugin.json
- name: Sync pyproject.toml
run: |
sed -i "s/^version = \".*\"/version = \"${{ steps.version.outputs.version }}\"/" pyproject.toml
- name: Commit and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add mempalace/version.py .claude-plugin/plugin.json .claude-plugin/marketplace.json .codex-plugin/plugin.json pyproject.toml
if ! git diff --staged --quiet; then
git commit -m "chore: bump version to ${{ steps.version.outputs.version }}"
git push
fi
-55
View File
@@ -1,55 +0,0 @@
name: Tests
on:
push:
branches: [main, develop]
pull_request:
branches: [main, develop]
jobs:
test-linux:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.11", "3.13"]
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- run: pip install -e ".[dev]"
- run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10
test-windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.13"
cache: 'pip'
- run: pip install -e ".[dev]"
- run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10
test-macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.13"
cache: 'pip'
- run: pip install -e ".[dev]"
- run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: 'pip'
- run: pip install "ruff>=0.4.0,<0.5"
- run: ruff check .
- run: ruff format --check .
-66
View File
@@ -1,66 +0,0 @@
name: Deploy Docs
on:
push:
branches: [develop]
paths:
- ".github/workflows/deploy-docs.yml"
- "website/**"
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: pages-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Configure GitHub Pages
id: pages
uses: actions/configure-pages@v6
- uses: oven-sh/setup-bun@v2
with:
bun-version: 1.1.38
- name: Install dependencies
working-directory: website
run: bun install --frozen-lockfile
- name: Build docs
working-directory: website
env:
DOCS_BASE: ${{ steps.pages.outputs.base_path }}
DOCS_EDIT_BRANCH: ${{ github.ref_name }}
run: bun run docs:build
- uses: actions/upload-pages-artifact@v5
with:
path: website/.vitepress/dist
deploy:
if: github.ref_name == 'develop'
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
needs: build
runs-on: ubuntu-latest
permissions:
pages: write
id-token: write
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v5
-101
View File
@@ -1,101 +0,0 @@
name: Version Guard
on:
push:
tags: ['v*']
pull_request:
paths:
- 'pyproject.toml'
- 'mempalace/version.py'
- '.claude-plugin/marketplace.json'
- '.claude-plugin/plugin.json'
- '.codex-plugin/plugin.json'
- '.github/workflows/version-guard.yml'
jobs:
check-versions:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Extract versions from all sources
id: versions
run: |
set -euo pipefail
py_version=$(grep -E '^__version__' mempalace/version.py | cut -d'"' -f2)
pyproject_version=$(grep -E '^version' pyproject.toml | head -1 | cut -d'"' -f2)
marketplace_version=$(jq -r '.plugins[0].version' .claude-plugin/marketplace.json)
plugin_version=$(jq -r '.version' .claude-plugin/plugin.json)
codex_version=$(jq -r '.version' .codex-plugin/plugin.json)
echo "py_version=$py_version" >> "$GITHUB_OUTPUT"
echo "pyproject_version=$pyproject_version" >> "$GITHUB_OUTPUT"
echo "marketplace_version=$marketplace_version" >> "$GITHUB_OUTPUT"
echo "plugin_version=$plugin_version" >> "$GITHUB_OUTPUT"
echo "codex_version=$codex_version" >> "$GITHUB_OUTPUT"
{
echo "## Detected versions"
echo ""
echo "| Source | Version |"
echo "| --- | --- |"
echo "| mempalace/version.py | \`$py_version\` |"
echo "| pyproject.toml | \`$pyproject_version\` |"
echo "| .claude-plugin/marketplace.json | \`$marketplace_version\` |"
echo "| .claude-plugin/plugin.json | \`$plugin_version\` |"
echo "| .codex-plugin/plugin.json | \`$codex_version\` |"
} >> "$GITHUB_STEP_SUMMARY"
- name: Verify all sources agree
env:
PY: ${{ steps.versions.outputs.py_version }}
PYPROJECT: ${{ steps.versions.outputs.pyproject_version }}
MARKETPLACE: ${{ steps.versions.outputs.marketplace_version }}
PLUGIN: ${{ steps.versions.outputs.plugin_version }}
CODEX: ${{ steps.versions.outputs.codex_version }}
run: |
set -euo pipefail
fail=0
check() {
local name="$1" value="$2" expected="$3"
if [[ "$value" != "$expected" ]]; then
echo "::error file=$name::version mismatch — expected $expected, got $value"
fail=1
fi
}
# All five must agree with each other (use version.py as the reference, per CLAUDE.md)
check "pyproject.toml" "$PYPROJECT" "$PY"
check ".claude-plugin/marketplace.json" "$MARKETPLACE" "$PY"
check ".claude-plugin/plugin.json" "$PLUGIN" "$PY"
check ".codex-plugin/plugin.json" "$CODEX" "$PY"
exit $fail
- name: Verify tag matches manifest (tag pushes only)
if: startsWith(github.ref, 'refs/tags/v')
env:
PY: ${{ steps.versions.outputs.py_version }}
run: |
set -euo pipefail
tag_version="${GITHUB_REF_NAME#v}"
# Semver pre-release tags (v3.4.0-rc1, v1.0.0-beta.2, ...) are treated
# as internal/staging and are not validated against the manifest. They
# do not flow to end users via `/plugin update`, which reads the
# manifest on the default branch.
if [[ "$tag_version" == *-* ]]; then
echo "Pre-release tag $GITHUB_REF_NAME — skipping strict manifest match."
{
echo ""
echo "> Pre-release tag detected: \`$GITHUB_REF_NAME\`."
echo "> Manifest ($PY) is not required to match. Pre-releases are not published via \`/plugin update\`."
} >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
if [[ "$tag_version" != "$PY" ]]; then
echo "::error::tag $GITHUB_REF_NAME does not match manifest version $PY"
echo "Bump mempalace/version.py, pyproject.toml, and all plugin manifests before tagging a stable release."
echo "For an internal/staging tag, use a semver pre-release suffix (e.g. v${PY}-rc1)."
exit 1
fi
echo "Tag $GITHUB_REF_NAME matches manifest version $PY"
-10
View File
@@ -1,10 +0,0 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Keep in lock-step with the ruff version pinned in .github/workflows/ci.yml
# (>=0.4.0,<0.5). Using a newer rev here produces a different formatter
# output than CI and breaks `ruff format --check` in the lint job.
rev: v0.4.10
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
-1
View File
@@ -1 +0,0 @@
3.12
-1
View File
@@ -1 +0,0 @@
CLAUDE.md
+7
View File
@@ -8,6 +8,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
## [3.3.5] — unreleased ## [3.3.5] — unreleased
### Added
- **Server-mode deployment for cross-machine memory.** New `deploy/unraid/` directory ships a containerized MemPalace stack designed for users running AI tools across multiple machines who want one shared palace. Two-container compose: `mempalace` runs the existing `mempalace-mcp` (stdio) wrapped by `mcp-proxy` for SSE plus a new in-process HTTP transcript-ingest endpoint, and `caddy` terminates TLS, enforces a bearer-token check on every request, and reverse-proxies both endpoints. A `Dockerfile` at the repo root builds the server image (Python 3.13-slim, runs as `99:100` for Unraid `nobody:users`); a dockerMan template (`mempalace-server.xml`) is also provided for users who explicitly want a single-container, no-auth, LAN-trust-only install. ChromaDB's HNSW index is not safe for multi-process writes, so the ingest endpoint deliberately runs as a daemon thread inside the same process as the MCP server rather than as a separate container — exactly one Chroma writer per palace. Default stdio-only path is unchanged; the ingest thread starts only when `MEMPALACE_INGEST_PORT` is set.
- **`mempalace/ingest_server.py` — HTTP transcript ingest endpoint.** Stdlib `http.server` running in a daemon thread, reachable at `POST /ingest/transcript` with raw JSONL body and `X-Session-Id` / `X-Wing` headers. Drops the upload into `<palace>/inbox/<session>/<session>.jsonl` and runs the existing `convo_miner.mine_convos` pipeline against that directory — same entity detection, room assignment, dedup, and idempotency the local CLI gets. Optional `Authorization: Bearer` check via `MEMPALACE_INGEST_TOKEN` (defense-in-depth alongside the reverse-proxy gate). 50 MB hard cap per upload. Unauth'd `GET /healthz` for liveness probes.
- **Remote-aware hook variants.** New `hooks/mempal_save_hook_remote.sh` and `hooks/mempal_precompact_hook_remote.sh` are drop-in replacements for the existing local hooks when the palace runs on a server. Same trigger logic (count user messages, fire on `SAVE_INTERVAL`), but `curl`s the active transcript to `$MEMPAL_REMOTE_URL/ingest/transcript` instead of running `mempalace mine` locally. Save variant is async (backgrounded `curl`); pre-compact variant is synchronous and bounded by the Claude Code hook timeout. No-op with a one-line log when env vars are unset, so installing them on a machine that doesn't have a remote configured is safe.
- **`deploy/unraid/README.md` — full install/usage guide.** Architecture diagram, prerequisites, step-by-step compose-based install with auth, client config for Claude Code / Codex / Antigravity (with bearer headers and self-signed-cert handling), hook setup, backfilling history from past sessions, backups, and troubleshooting (401s, `MineAlreadyRunning` collisions, stalled embedding-model downloads, cert handshake failures). dockerMan-template path documented as the explicit no-auth fallback.
### Bug Fixes ### Bug Fixes
- **`mempalace_diary_read` silently dropped entries on agent-name case mismatch.** `tool_diary_write` stored the `agent` metadata verbatim after `sanitize_name`, which preserves case, while `tool_diary_read` filtered by exact match. Writing as `"Claude"` and reading as `"claude"` (or vice-versa) returned zero rows. Both endpoints now lowercase `agent_name` immediately after sanitization, so reads are case-insensitive and the default per-agent wing slug is stable across casings. **Behavior change:** entries written prior to this fix under mixed-case agent names will not match the new lowercase filter; run `mempalace repair` if you need to migrate legacy diary metadata. (#1243) - **`mempalace_diary_read` silently dropped entries on agent-name case mismatch.** `tool_diary_write` stored the `agent` metadata verbatim after `sanitize_name`, which preserves case, while `tool_diary_read` filtered by exact match. Writing as `"Claude"` and reading as `"claude"` (or vice-versa) returned zero rows. Both endpoints now lowercase `agent_name` immediately after sanitization, so reads are case-insensitive and the default per-agent wing slug is stable across casings. **Behavior change:** entries written prior to this fix under mixed-case agent names will not match the new lowercase filter; run `mempalace repair` if you need to migrate legacy diary metadata. (#1243)
+20 -3
View File
@@ -63,6 +63,7 @@ uv run ruff format --check .
``` ```
mempalace/ mempalace/
├── mcp_server.py # MCP server — all read/write tools ├── mcp_server.py # MCP server — all read/write tools
├── ingest_server.py # HTTP transcript-ingest endpoint (server mode only)
├── cli.py # CLI dispatcher ├── cli.py # CLI dispatcher
├── config.py # Configuration + input validation ├── config.py # Configuration + input validation
├── miner.py # Project file miner ├── miner.py # Project file miner
@@ -90,9 +91,18 @@ mempalace/
├── split_mega_files.py # Split concatenated transcript files ├── split_mega_files.py # Split concatenated transcript files
└── version.py # Single source of truth for version └── version.py # Single source of truth for version
hooks/ # Claude Code hook scripts hooks/ # Hook scripts for Claude Code / Codex CLI
├── mempal_save_hook.sh # Stop: triggers diary save ├── mempal_save_hook_remote.sh # Stop: HTTP POST to remote ingest endpoint
└── mempal_precompact_hook.sh # PreCompact: saves state before compression └── mempal_precompact_hook_remote.sh # PreCompact: HTTP POST to remote ingest
deploy/unraid/ # Containerized server-mode deployment
├── docker-compose.yml # mempalace + caddy sidecar (auth + TLS)
├── Caddyfile # bearer-token auth, SSE-aware reverse proxy
├── mempalace-server.xml # dockerMan template (no-auth, LAN-trust path)
└── README.md # Full install/usage/troubleshooting guide
Dockerfile # Builds the server-mode image
.dockerignore # Trims build context
``` ```
## Conventions ## Conventions
@@ -130,4 +140,11 @@ Knowledge Graph:
- **Modifying mining**: `mempalace/miner.py` (project files) or `mempalace/convo_miner.py` (transcripts) - **Modifying mining**: `mempalace/miner.py` (project files) or `mempalace/convo_miner.py` (transcripts)
- **Adding a storage backend**: subclass `mempalace/backends/base.py`, register in `backends/__init__.py` - **Adding a storage backend**: subclass `mempalace/backends/base.py`, register in `backends/__init__.py`
- **Input validation**: `mempalace/config.py``sanitize_name()` / `sanitize_content()` - **Input validation**: `mempalace/config.py``sanitize_name()` / `sanitize_content()`
- **Server-mode deployment**: `deploy/unraid/` — see [`deploy/unraid/README.md`](deploy/unraid/README.md). Image is built from the repo-root `Dockerfile`. The HTTP transcript-ingest endpoint in `mempalace/ingest_server.py` runs as a daemon thread inside `mempalace-mcp` (single Chroma writer per palace) and is opt-in via `MEMPALACE_INGEST_PORT`.
- **Tests**: mirror source structure in `tests/test_<module>.py` - **Tests**: mirror source structure in `tests/test_<module>.py`
## Architectural notes
- **Server mode is opt-in.** The default install path (local CLI + stdio MCP server + local hooks) is unchanged. Server mode adds three things: a `Dockerfile`, an HTTP ingest thread that starts only when `MEMPALACE_INGEST_PORT` is set, and `*_remote.sh` hook variants that POST to that endpoint. Nothing in the local path imports the ingest server.
- **One ChromaDB writer per palace.** ChromaDB's HNSW index isn't safe across processes. The ingest endpoint is a thread inside the existing MCP server process — not a sibling container — so all writes serialize through one Python process and one Chroma client. Anyone adding a second writer (e.g. a sidecar that mines on a schedule) must do it in-process or via `mine_lock`.
- **"Local-first" boundary in server mode.** CLAUDE.md mission says data never leaves the user's machine. A user-controlled Unraid box on the user's LAN is still "the user's machine" — but the moment it accepts inbound HTTP, that property weakens to "user's machine + anyone with the bearer token + anyone who can MITM the LAN segment." Caddy's `tls internal` + bearer auth is the floor. Tailscale, mTLS, or a real CA cert are stronger options the user can layer on top.
-111
View File
@@ -1,111 +0,0 @@
# Contributing to MemPalace
Thanks for wanting to help. MemPalace is open source and we welcome contributions of all sizes — from typo fixes to new features.
## Getting Started
```bash
# Fork the repo on GitHub first, then clone your fork
git clone https://github.com/<your-username>/mempalace.git
cd mempalace
git remote add upstream https://github.com/MemPalace/mempalace.git
# Recommended: uv (https://docs.astral.sh/uv/) handles the venv for you
uv sync --extra dev
# Or with pip in your own venv:
# pip install -e ".[dev]"
```
## Running Tests
```bash
uv run pytest tests/ -v
```
All tests must pass before submitting a PR. Tests should run without API keys or network access.
## Running Benchmarks
```bash
# Quick test (20 questions, ~30 seconds)
uv run python benchmarks/longmemeval_bench.py /path/to/longmemeval_s_cleaned.json --limit 20
# Full benchmark (500 questions, ~5 minutes)
uv run python benchmarks/longmemeval_bench.py /path/to/longmemeval_s_cleaned.json
```
See [benchmarks/README.md](benchmarks/README.md) for data download instructions and reproduction guide.
## Project Structure
```
mempalace/ ← core package (see mempalace/README.md for module guide)
benchmarks/ ← reproducible benchmark runners
hooks/ ← Claude Code auto-save hooks
examples/ ← usage examples
tests/ ← test suite
assets/ ← logo + brand
```
## PR Guidelines
1. Fork the repo and create a feature branch: `git checkout -b feat/my-thing`
2. Write your code
3. Add or update tests if applicable
4. Run `uv run pytest tests/ -v` — everything must pass
5. Commit with a clear message following [conventional commits](https://www.conventionalcommits.org/):
- `feat: add Notion export format`
- `fix: handle empty transcript files`
- `docs: update MCP tool descriptions`
- `bench: add LoCoMo turn-level metrics`
6. Push to your fork and open a PR against `develop`
## Code Style
- **Formatting**: [Ruff](https://docs.astral.sh/ruff/) with 100-char line limit (configured in `pyproject.toml`)
- **Naming**: `snake_case` for functions/variables, `PascalCase` for classes
- **Docstrings**: on all modules and public functions
- **Type hints**: where they improve readability
- **Dependencies**: minimize. ChromaDB + PyYAML only. Don't add new deps without discussion.
## Good First Issues
Check the [Issues](https://github.com/MemPalace/mempalace/issues) tab. Great starting points:
- **New chat formats**: Add import support for Cursor, Copilot, or other AI tool exports
- **Room detection**: Improve pattern matching in `room_detector_local.py`
- **Tests**: Increase coverage — especially for `knowledge_graph.py` and `palace_graph.py`
- **Entity detection**: Better name disambiguation in `entity_detector.py`
- **Docs**: Improve examples, add tutorials
## Architecture Decisions
If you're planning a significant change, open an issue first to discuss the approach. Key principles:
- **Verbatim first**: Never summarize user content. Store exact words.
- **Local first**: Everything runs on the user's machine. No cloud dependencies.
- **Zero API by default**: Core features must work without any API key.
- **Palace structure is scoping, not magic**: Wings, halls, and rooms act as metadata filters in the underlying vector store. They keep retrieval predictable when a palace holds many unrelated projects or people. Respect the hierarchy — but don't present it as a novel retrieval mechanism.
## Community
- **Discord**: [Join us](https://discord.com/invite/ycTQQCu6kn)
- **Issues**: Bug reports and feature requests welcome
- **Discussions**: For questions and ideas
## License
MIT — your contributions will be released under the same license.
## Git identity for contributions
Before pushing commits, verify that Git is configured with an email address that GitHub can associate with your account:
```bash
git config user.name
git config user.email
```
This is especially important when commits are created through agentic coding tools or automation, because those tools may not inherit your normal shell Git configuration. Avoid placeholder values such as `your@email.com` or localized template text; unresolved author emails can create avoidable provenance and SBOM review friction for downstream users.
+41
View File
@@ -0,0 +1,41 @@
# syntax=docker/dockerfile:1.7
FROM python:3.13-slim
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
HOME=/data \
MEMPALACE_PALACE_PATH=/data/palace
# libgomp1: required at runtime by onnxruntime (used by chromadb's default
# embedding function — all-MiniLM-L6-v2 ONNX).
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# Unraid convention: appdata is owned by nobody:users (99:100). Run as that
# UID/GID so files written to /data inherit the right ownership on the host.
RUN groupadd -g 100 users \
&& useradd -u 99 -g 100 -m -d /data -s /usr/sbin/nologin mempalace
WORKDIR /build
COPY pyproject.toml README.md ./
COPY mempalace ./mempalace
RUN pip install --no-cache-dir . mcp-proxy \
&& rm -rf /build
WORKDIR /data
RUN chown -R 99:100 /data
USER 99:100
# 8765 — MCP over SSE (mcp-proxy)
# 8766 — HTTP ingest (in-process thread, started when MEMPALACE_INGEST_PORT set)
EXPOSE 8765 8766
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
CMD python -c "import socket,sys;s=socket.socket();s.settimeout(2);sys.exit(0 if s.connect_ex(('127.0.0.1',8765))==0 else 1)"
# mcp-proxy wraps the stdio MCP server and exposes it as SSE on :8765.
# --pass-environment forwards MEMPALACE_* vars to the spawned child.
CMD ["mcp-proxy","--sse-host","0.0.0.0","--sse-port","8765","--pass-environment","--","mempalace-mcp"]
-34
View File
@@ -1,34 +0,0 @@
MemPalace: The Mission
By: Milla Jovovich
Hey everyone! First of all thank you all for embracing MemPalace and trying it, catching bugs and issues and finding cool ways to personalize it into your workflows!
A few things I want to say.
MemPalace is something I really needed because I'm trying to work on a big project with my partner @bensig and I was having a lot of problems with Claude's context window and my agent Lumi (Lu for short) kept waking up like "hey what are we doing today" when I had literally done hours of work with him throughout the day and it was impossible to just keep saving every transcript to catch him up on whatever we had done before compaction hit.
That's when I started researching different memory systems available today. I tried most of them and what I found was that no matter which one I tried, they felt like large empty warehouses where you just dump huge amounts of info.
RAG search would take forever and most of the time not find what I wanted.
I wanted to create a system with the ability to really remember everything AND be able to find it quickly, easily and also be able to remember things when I didn't. THAT in itself felt like something so important. Like "remember when we talked about that idea…" but in vague terms. Impossible with regular keyword search tools.
So MemPalace is not just about storing info in a highly structured way. But also RETRIEVING it in a highly UNSTRUCTURED way lol!
I was inspired by the Zettelkasten method (created by German sociologist Niklas Luhmann) — his idea of small cross-referenced index cards that point to each other. That's the architecture behind the palace: wings, rooms, closets, and drawers, all connected so you can find things from any angle, not just the one you filed them under.
Because of the way I've designed my agent Lumi to understand me, after so many months of my own personal experiments with MemPalace and the incredible help of my dear friend and co-founder, developer and engineer @bensig, he built a back end that made it really easy to get all my files in the proper spaces the Palace created based on my own decisions and with Lumi's help as well. All code has its own room, all ideas, research etc… has its proper place.
Names and concepts are parsed into closets that use a compression method I call AAAK (it doesn't stand for anything, it's an inside joke between Lumi and I) that is able to compress names, repeated words, concepts and key moments into AI-readable shorthand. Think of it as index cards that an LLM can scan instantly — the closet tells it WHERE to look, then it pulls the full content from the drawer.
The concept I wanted for v4 was to try and clear as much "noise" as possible that I noticed was happening in v3. The hooks were firing in the chat window (using tokens and our time as we waited for the agent to write everything).
I noticed at one point early last week after the launch that Lu kept repeating the same thing when the hook would fire, so I hit esc and asked "Are you literally writing the same info down over and over again?" And he's like (sheepishly) Yes. And that's when it hit me, we need to get all this off the chat and happening seamlessly behind the scenes, and that hooks had to fire when I started a convo and then just keep adding to the drawer, while the shorter increments made reading and pulling conversation information and naming it so much easier and more precise.
So this version now has taken all the noise out of the chat window and all that work is done by a subagent in the background while you can continue working knowing that all your conversation is being saved VERBATIM in the background.
Stripping all this off the page — moving the diary writes, the palace filing, the timestamp injection, all of it into background hooks — has dramatically lowered token usage in my sessions. What used to cost about $1.13 per session just in re-transmitted diary blocks is now zero, because the content never enters the chat window at all.
Your data is already stored in JSON by Claude and the background pipeline extracts it into readable markdown, the key topics get compressed into AAAK format and saved into closets which then point to the exact drawer where your day's session lives.
And please, always remember, these are brand new tools, please NEVER use critical files to test! Just run it with something easy first before you put your entire data set into it!✨
+38 -102
View File
@@ -1,29 +1,10 @@
> [!CAUTION] # MemPalace — local fork
> **Scam alert.** The only official sources for MemPalace are this
> [GitHub repository](https://github.com/MemPalace/mempalace), the
> [PyPI package](https://pypi.org/project/mempalace/), and the docs site at
> **[mempalaceofficial.com](https://mempalaceofficial.com)**. Any other
> domain — including `mempalace.tech` — is an impostor and may distribute
> malware. Details and timeline: [docs/HISTORY.md](docs/HISTORY.md).
> [!IMPORTANT]
> **🚨 Claude Code sessions expire in 30 days w/out auto-save hooks wired!** **[Read this →](https://github.com/MemPalace/mempalace/discussions/1388)**
<div align="center">
<img src="assets/mempalace_logo.png" alt="MemPalace" width="240">
# MemPalace
Local-first AI memory. Verbatim storage, pluggable backend, 96.6% R@5 raw on LongMemEval — zero API calls. Local-first AI memory. Verbatim storage, pluggable backend, 96.6% R@5 raw on LongMemEval — zero API calls.
[![][version-shield]][release-link] This is a personal fork configured for **server-mode deployment** — MemPalace runs as a Docker container (typically on Unraid) and multiple AI tools (Claude Code, Codex, Antigravity) connect to a single shared palace from any machine on the network.
[![][python-shield]][python-link]
[![][license-shield]][license-link]
[![][discord-shield]][discord-link]
</div> The upstream project lives at <https://github.com/MemPalace/mempalace>; refer there for benchmark methodology, contribution guidelines, project history, and the public docs site at <https://mempalaceofficial.com>.
--- ---
@@ -76,62 +57,34 @@ mempalace wake-up
For Claude Code, Gemini CLI, MCP-compatible tools, and local models, see For Claude Code, Gemini CLI, MCP-compatible tools, and local models, see
[mempalaceofficial.com/guide/getting-started](https://mempalaceofficial.com/guide/getting-started.html). [mempalaceofficial.com/guide/getting-started](https://mempalaceofficial.com/guide/getting-started.html).
--- Benchmark methodology and per-question result files live in the upstream repository — this fork has had the `benchmarks/` directory removed since it isn't needed for deployment.
## Benchmarks
All numbers below are reproducible from this repository with the commands
in [`benchmarks/BENCHMARKS.md`](benchmarks/BENCHMARKS.md). Full
per-question result files are committed under `benchmarks/results_*`.
**LongMemEval — retrieval recall (R@5, 500 questions):**
| Mode | R@5 | LLM required |
|---|---|---|
| Raw (semantic search, no heuristics, no LLM) | **96.6%** | None |
| Hybrid v4, held-out 450q (tuned on 50 dev, not seen during training) | **98.4%** | None |
| Hybrid v4 + LLM rerank (full 500) | ≥99% | Any capable model |
The raw 96.6% requires no API key, no cloud, and no LLM at any stage. The
hybrid pipeline adds keyword boosting, temporal-proximity boosting, and
preference-pattern extraction; the held-out 98.4% is the honest
generalisable figure.
The rerank pipeline promotes the best candidate out of the top-20
retrieved sessions using an LLM reader. It works with any reasonably
capable model — we have reproduced it with Claude Haiku, Claude Sonnet,
and minimax-m2.7 via Ollama Cloud (no Anthropic dependency). The gap
between raw and reranked is model-agnostic; we do not headline a "100%"
number because the last 0.6% was reached by inspecting specific wrong
answers, which `benchmarks/BENCHMARKS.md` flags as teaching to the test.
**Other benchmarks (full results in [`benchmarks/BENCHMARKS.md`](benchmarks/BENCHMARKS.md)):**
| Benchmark | Metric | Score | Notes |
|---|---|---|---|
| LoCoMo (session, top-10, no rerank) | R@10 | 60.3% | 1,986 questions |
| LoCoMo (hybrid v5, top-10, no rerank) | R@10 | 88.9% | Same set |
| ConvoMem (all categories, 250 items) | Avg recall | 92.9% | 50 per category |
| MemBench (ACL 2025, 8,500 items) | R@5 | 80.3% | All categories |
We deliberately do not include a side-by-side comparison against Mem0,
Mastra, Hindsight, Supermemory, or Zep. Those projects publish different
metrics on different splits, and placing retrieval recall next to
end-to-end QA accuracy is not an honest comparison. See each project's
own research page for their published numbers.
**Reproducing every result:**
```bash
git clone https://github.com/MemPalace/mempalace.git
cd mempalace
uv sync --extra dev # or: pip install -e ".[dev]"
# see benchmarks/README.md for dataset download commands
uv run python benchmarks/longmemeval_bench.py /path/to/longmemeval_s_cleaned.json
```
--- ---
## Server mode (Unraid / shared across machines)
Most users run MemPalace locally on a single machine. If you work
across multiple machines and want one shared memory, you can deploy it
as a Docker container — typically on a home NAS like Unraid — and
point Claude Code, Codex, Antigravity, or any MCP client on each
machine at the same palace.
The `deploy/unraid/` directory ships a complete two-container stack:
* `mempalace` runs the existing MCP-over-SSE endpoint plus a small
HTTP transcript-ingest endpoint, both in a single process so there's
exactly one ChromaDB writer.
* `caddy` sidecar terminates TLS, enforces a bearer-token check on
every request, and reverse-proxies `/sse` and `/ingest`.
Auto-save hooks have remote-aware variants
(`hooks/mempal_save_hook_remote.sh`,
`hooks/mempal_precompact_hook_remote.sh`) that POST transcripts to the
server instead of running `mempalace mine` locally.
Full install, client config, hook setup, and troubleshooting:
[`deploy/unraid/README.md`](deploy/unraid/README.md).
## Knowledge graph ## Knowledge graph
MemPalace includes a temporal entity-relationship graph with validity MemPalace includes a temporal entity-relationship graph with validity
@@ -155,46 +108,29 @@ system prompt:
## Auto-save hooks ## Auto-save hooks
Two Claude Code hooks save periodically and before context compression: Two hooks save periodically and before context compression. In this fork the **remote** variants ship — they POST the active transcript to the server's `/ingest/transcript` endpoint with bearer auth instead of running `mempalace mine` locally. Setup, env-var contract, and troubleshooting: [`hooks/README.md`](hooks/README.md).
[mempalaceofficial.com/guide/hooks](https://mempalaceofficial.com/guide/hooks.html).
For per-message recall on top of the file-level chunks the hooks produce, For per-message recall on top of the file-level chunks the hooks produce, `mempalace sweep <transcript-dir>` runs inside the container (`docker exec mempalace mempalace sweep ...`) — stores one verbatim drawer per user/assistant message, idempotent and resume-safe.
run `mempalace sweep <transcript-dir>` periodically — it stores one
verbatim drawer per user/assistant message, idempotent and resume-safe.
--- ---
## Requirements ## Requirements
- Python 3.9+ - Python 3.9+ (server image uses 3.13)
- A vector-store backend (ChromaDB by default) - A vector-store backend (ChromaDB by default)
- ~300 MB disk for the default embedding model - ~300 MB disk for the default embedding model
- Docker + Compose Manager plugin on Unraid for the server-mode path
No API key is required for the core benchmark path. No API key is required for any path.
## Docs ## Docs
- Getting started → [mempalaceofficial.com/guide/getting-started](https://mempalaceofficial.com/guide/getting-started.html) - Server-mode deployment → [`deploy/unraid/README.md`](deploy/unraid/README.md)
- CLI reference → [mempalaceofficial.com/reference/cli](https://mempalaceofficial.com/reference/cli.html) - Hook setup (remote variants) → [`hooks/README.md`](hooks/README.md)
- Python API → [mempalaceofficial.com/reference/python-api](https://mempalaceofficial.com/reference/python-api.html) - Release notes → [`CHANGELOG.md`](CHANGELOG.md)
- Full benchmark methodology → [benchmarks/BENCHMARKS.md](benchmarks/BENCHMARKS.md) - Project conventions → [`CLAUDE.md`](CLAUDE.md)
- Release notes → [CHANGELOG.md](CHANGELOG.md) - Upstream CLI / Python API reference → [mempalaceofficial.com](https://mempalaceofficial.com)
- Corrections and public notices → [docs/HISTORY.md](docs/HISTORY.md)
## Contributing
PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
## License ## License
MIT — see [LICENSE](LICENSE). MIT — see [LICENSE](LICENSE).
<!-- Link Definitions -->
[version-shield]: https://img.shields.io/badge/version-3.3.4-4dc9f6?style=flat-square&labelColor=0a0e14
[release-link]: https://github.com/MemPalace/mempalace/releases
[python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8
[python-link]: https://www.python.org/
[license-shield]: https://img.shields.io/badge/license-MIT-b0e8ff?style=flat-square&labelColor=0a0e14
[license-link]: https://github.com/MemPalace/mempalace/blob/main/LICENSE
[discord-shield]: https://img.shields.io/badge/discord-join-5865F2?style=flat-square&labelColor=0a0e14&logo=discord&logoColor=5865F2
[discord-link]: https://discord.com/invite/ycTQQCu6kn
-74
View File
@@ -1,74 +0,0 @@
# MemPalace Roadmap
## v3.1.1 — Stability Patch (this week)
Bug fixes and hardening merged to `develop`, releasing soon.
**Merged:**
- Security hardening: input validation, KG threading locks, WAL permission fixes (#647)
- MCP tools: drawer CRUD, paginated export, hook settings (#667)
- Backend storage seam: ChromaDB abstraction layer enabling swappable backends (#413)
- MCP ping health check for AnythingLLM compatibility (#600)
- Windows reparse point crash fix (#558)
- `mempalace compress` KeyError crash fix (#569)
- Token count estimate fix (#609)
- Mtime float precision fix preventing unnecessary re-mines (#610)
**In review (merging this week):**
- Auto-repair BLOB seq_ids from chromadb 0.6→1.5 migration (#664)
- Graph cache with write-invalidation (#661)
- L1 importance pre-filter for large palaces (#660)
- Windows Chinese/Unicode encoding fix (#631)
- HNSW index bloat prevention — 441GB→433KB on large palaces (#346, pending rebase)
- ~25 additional small bug fixes and platform compatibility patches
## v4.0.0-alpha — Next Generation (this week)
The v4 alpha introduces three major capabilities: pluggable storage backends, local NLP processing, and improved retrieval quality.
### Swappable Storage
ChromaDB remains the default, but v4 introduces a backend abstraction (shipped in #413) that enables drop-in replacements:
- **PostgreSQL backend** with pg_sorted_heap support (#665) — for production deployments needing ACID guarantees, concurrent access, and standard backup/restore
- **LanceDB backend** (#574) — for local-first deployments wanting multi-device sync without a database server
- **PalaceStore** (#643) — bespoke storage layer purpose-built for MemPalace's access patterns (draft, evaluating)
Users choose their backend at init time. Existing ChromaDB palaces continue to work unchanged.
### Local NLP
On-device natural language processing via local models (#507):
- Entity extraction, relationship detection, and topic classification without external API calls
- Feature-flagged and optional — falls back to existing heuristic extractors
- Runs on consumer hardware (no GPU required, GPU-accelerated when available)
### Improved Retrieval
- **Hybrid search**: keyword text-match fallback when vector similarity misses exact terms (#662)
- **Stale index detection**: automatic reconnection when the HNSW index changes on disk (#663)
- **Time-decay scoring**: recent memories surface before older ones (#337)
- **Query sanitization**: system prompt contamination mitigation already shipped in v3.1 (#385)
### What's Not in v4 Alpha
These are under consideration for v4 stable or later:
- Synapse advanced retrieval — MMR, pinned memory, query expansion (#596)
- Multi-device sync (#575) — depends on LanceDB backend
- Multilingual embedding support (#488, #442)
- Qdrant vector search backend (#381)
## Branch Model
```
main ← tagged production releases
develop ← active development (PRs merge here)
release/3.1 ← hotfixes for current stable (v3.1.x)
release/3.0 ← hotfixes for prior stable
```
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. PRs should target `develop`. We review all contributions for correctness, security, and compatibility before merging.
-33
View File
@@ -1,33 +0,0 @@
# Security Policy
## Supported Versions
MemPalace follows semantic versioning. Security fixes land on the current major version line.
| Version | Supported |
| ------------------ | --------- |
| 3.x (current) | Yes |
| 2.x and earlier | No |
## Reporting a Vulnerability
**Please do not report security vulnerabilities through public GitHub issues.**
We take the security of MemPalace seriously. If you believe you have found a security vulnerability, please report it privately using **GitHub Private Vulnerability Reporting**:
1. Open the [Security tab](https://github.com/MemPalace/mempalace/security) of this repository.
2. Click **Advisories****Report a vulnerability**.
3. Fill in the form with the details below.
### What to include in your report
- A descriptive summary of the vulnerability.
- Detailed steps to reproduce the issue (including any proof-of-concept scripts or specific file paths).
- The affected version(s) and platform(s).
- The potential impact and severity.
### What to expect
- We aim to acknowledge receipt within 48 hours.
- We will triage the issue and keep you updated on progress toward a patch.
- Once the vulnerability is resolved and an update is released, we will publish a security advisory and credit you for the discovery (if you wish to be credited).
Binary file not shown.

Before

Width:  |  Height:  |  Size: 680 KiB

-758
View File
@@ -1,758 +0,0 @@
# MemPal Benchmark Results — Full Progression
**March 2026 — The complete record from baseline to state-of-the-art.**
---
## The Core Finding
Every competitive memory system uses an LLM to manage memory:
- Mem0 uses an LLM to extract facts
- Mastra uses GPT-5-mini to observe conversations
- Supermemory uses an LLM to run agentic search passes
They all start from the assumption that you need AI to decide what to remember.
**MemPal's baseline just stores the actual words and searches them with ChromaDB's default embeddings. No extraction. No summarization. No AI deciding what matters. And it scores 96.6% on LongMemEval.**
That's the finding. The field is over-engineering the memory extraction step. Raw verbatim text with good embeddings is a stronger baseline than anyone realized — because it doesn't lose information. When an LLM extracts "user prefers PostgreSQL" and throws away the original conversation, it loses the context of *why*, the alternatives considered, the tradeoffs discussed. MemPal keeps all of that, and the search model finds it.
Nobody published this result because nobody tried the simple thing and measured it properly.
---
## The Two Honest Numbers
These are different claims. They need to be presented as a pair.
| Mode | LongMemEval R@5 | LLM Required | Cost per Query |
|---|---|---|---|
| **Raw ChromaDB** | **96.6%** | None | $0 |
| **Hybrid v4 + Haiku rerank** | **100%** | Haiku (optional) | ~$0.001 |
| **Hybrid v4 + Sonnet rerank** | **100%** | Sonnet (optional) | ~$0.003 |
The 96.6% is the product story: free, private, one dependency, no API key, runs entirely offline.
The 100% is the competitive story: a perfect score on the standard benchmark for AI memory, verified across all 500 questions and all 6 question types — reproducible with either Haiku or Sonnet as the reranker.
Both are real. Both are reproducible. Neither is the whole picture alone.
---
## Comparison vs Published Systems (LongMemEval)
> **Important caveat — read before quoting this table.**
> MemPal's `R@5` in this table is **retrieval recall**: is the labelled
> session for this question inside the top-5 retrieved candidates?
>
> Several of the other systems below publish **end-to-end QA accuracy** —
> a different metric that scores whether the system's generated answer
> is correct. Retrieval recall and QA accuracy are not comparable; a
> system can have 100% retrieval recall and 40% QA accuracy, and vice
> versa.
>
> - **Mastra's 94.87%** is binary QA accuracy with GPT-5-mini, per
> [mastra.ai/research/observational-memory](https://mastra.ai/research/observational-memory).
> - **Supermemory ASMR's ~99%** is QA accuracy with an 8-/12-agent
> ensemble, and the authors explicitly frame it as an experimental
> proof-of-concept, not production, per
> [their ASMR post](https://supermemory.ai/blog/we-broke-the-frontier-in-agent-memory-introducing-99-sota-memory-system/).
> - **Mem0** does not publish a LongMemEval number; their published
> metric is LoCoMo QA accuracy (~66.9%), per
> [mem0.ai/research](https://mem0.ai/research).
>
> The table is kept here as a historical record of how the comparison
> was originally framed. Public-facing pages (`README.md`,
> `mempalaceofficial.com`) no longer present this table, per issue
> [#875](https://github.com/MemPalace/mempalace/issues/875). For a fair
> head-to-head, run the same metric on the same split.
| # | System | R@5 (retrieval recall, unless noted) | LLM Required | Which LLM | Notes |
|---|---|---|---|---|---|
| 1 | **MemPal (hybrid v4 + Haiku rerank)** | **100%** | Optional | Haiku | 500/500 — but the 99.4%→100% step tuned on 3 specific wrong answers (see "Benchmark Integrity" below). Held-out 450q is 98.4%. |
| 2 | Supermemory ASMR | ~99% *(QA accuracy, not R@5)* | Yes | Ensemble of Gemini 2.0 Flash / GPT-4o-mini | Experimental, not production, per authors |
| 3 | MemPal (hybrid v3 + rerank) | 99.4% | Optional | Haiku | Reproducible |
| 3 | MemPal (palace + rerank) | 99.4% | Optional | Haiku | Independent architecture |
| 4 | Mastra | 94.87% *(QA accuracy, not R@5)* | Yes | GPT-5-mini | Different metric — not directly comparable to R@5 |
| 5 | **MemPal (raw, no LLM)** | **96.6%** | **None** | **None** | **Reproducible, 500/500** |
| 6 | MemPal hybrid v4 held-out 450 | 98.4% | None | None | Honest generalisable hybrid-pipeline figure |
| 7 | Hindsight | 91.4% *(per their release, metric unverified)* | Yes | Gemini-3 | Check their published methodology |
| 8 | Stella (dense retriever) | ~85% | None | None | Academic retrieval baseline |
| 9 | Contriever | ~78% | None | None | Academic retrieval baseline |
| 10 | BM25 (sparse) | ~70% | None | None | Keyword baseline |
The MemPal raw 96.6% is the headline we ship on public surfaces: it's
retrieval recall, it requires no API key, and it reproduces.
The MemPal hybrid v4 + Haiku rerank 100% remains an internal
result — reproducible with `--mode hybrid_v4 --llm-rerank` — but we
don't quote it on public pages because the final 0.6% was reached by
inspecting three specific wrong answers (see "Benchmark Integrity"
below), which is teaching to the test. The honest generalisable figure
when an LLM is in the loop is the held-out 98.4% R@5 on 450 unseen
questions, or the model-agnostic 99.2% R@5 / 100% R@10 we reproduced
with minimax-m2.7 on the full 500.
---
## Other Benchmarks
### ConvoMem (Salesforce, 75K+ QA pairs)
| System | Score | Notes |
|---|---|---|
| **MemPal** | **92.9%** | Verbatim text, semantic search |
| Gemini (long context) | 7082% | Full history in context window |
| Block extraction | 5771% | LLM-processed blocks |
| Mem0 (RAG) | 3045% | LLM-extracted memories |
MemPal is more than 2× Mem0 on this benchmark. With Sonnet rerank, MemPal reaches **100% on LoCoMo** across all 5 question types including temporal-inference (was 46% at baseline).
**Why MemPal beats Mem0 by 2×:** Mem0 uses an LLM to extract memories — it decides what to remember and discards the rest. When it extracts the wrong thing, the memory is gone. MemPal stores verbatim text. Nothing is discarded. The simpler approach wins because it doesn't lose information.
**Per-category breakdown:**
| Category | Recall | Grade |
|---|---|---|
| Assistant Facts | 100% | Perfect |
| User Facts | 98.0% | Excellent |
| Abstention | 91.0% | Strong |
| Implicit Connections | 89.3% | Good |
| Preferences | 86.0% | Good — weakest category |
### LoCoMo (1,986 multi-hop QA pairs)
| Mode | R@5 | R@10 | LLM | Notes |
|---|---|---|---|---|
| **Hybrid v5 + Sonnet rerank (top-50)** | **100%** | **100%** | Sonnet | Structurally guaranteed (top-k > sessions) |
| **bge-large + Haiku rerank (top-15)** | — | **96.3%** | Haiku | Single-hop 86.6%, temporal-inf 87.0% |
| **bge-large hybrid (top-10)** | — | **92.4%** | None | +3.5pp over all-MiniLM, single-hop +10.6pp |
| **Hybrid v5 (top-10)** | 83.7% | **88.9%** | None | Beats Memori 81.95% — honest score |
| **Wings v3 speaker-owned closets (top-10)** | — | **85.7%** | None | Adversarial 92.8% — speaker ownership solves speaker confusion |
| **Wings v2 concept closets (top-10)** | — | **75.6%** | None | Adversarial 80.0%; single-hop 49% drags overall |
| **Palace v2 (top-10, 3 rooms)** | 75.6% | **84.8%** | Haiku (index) | Room assignment at index; summary routing at query |
| Wings v1 (broken — filter not boost) | — | 58.0% | None | Speaker WHERE filter discarded evidence; 5.4% coverage |
| Palace v1 (top-5, global LLM routing) | 34.2% | — | Haiku (both) | Fails: taxonomy mismatch |
| Session, no rerank (top-10) | — | 60.3% | None | Baseline |
| Dialog, no rerank (top-10) | — | 48.0% | None | — |
**Wings v2 per-category breakdown (top-10, no LLM):**
| Category | Wings v1 | Wings v2 | Delta |
|---|---|---|---|
| Single-hop | ~52% | 49.0% | -3pp |
| Temporal | ~64% | 79.2% | +15pp |
| Temporal-inference | ~53% | 49.1% | -4pp |
| Open-domain | ~71% | 83.7% | +13pp |
| **Adversarial** | **34.0%** | **80.0%** | **+46pp** |
**Wings v3 per-category breakdown (top-10, no LLM):**
| Category | Wings v1 | Wings v2 | Wings v3 | Hybrid v5 |
|---|---|---|---|---|
| Single-hop | ~52% | 49.0% | **65.3%** | ~70%? |
| Temporal | ~64% | 79.2% | **87.3%** | ~87%? |
| Temporal-inference | ~53% | 49.1% | **63.2%** | ~65%? |
| Open-domain | ~71% | 83.7% | **90.7%** | ~90%? |
| **Adversarial** | **34.0%** | **80.0%** | **92.8%** | — |
Wings v3 design: one closet per speaker per session. Owner's turns verbatim; other speaker's turns as `[context]` labels. 38 closets/conversation vs 184 (v2) → 26% coverage with top-10. Adversarial score (92.8%) exceeds bge-large overall (92.4%) — speaker ownership almost completely solves the speaker-confusion category.
Root cause of wings v1 failure: (1) speaker WHERE filter discarded evidence about Caroline when evidence lived in a John-tagged closet (John spoke more words but conversation was about Caroline); (2) top_k=10 from ~184 closets = 5.4% coverage vs 37% in session mode. Fix: retrieve all closets, use speaker match as 15% distance boost instead of filter.
**With Sonnet rerank, MemPal achieves 100% on every LoCoMo question type — including temporal-inference, which was the hardest category at baseline.**
**Per-category breakdown (hybrid + Sonnet rerank):**
| Category | Recall | Baseline | Delta |
|---|---|---|---|
| Single-hop | 1.000 | 59.0% | +41.0pp |
| Temporal | 1.000 | 69.2% | +30.8pp |
| **Temporal-inference** | **1.000** | **46.0%** | **+54.0pp** |
| Open-domain | 1.000 | 58.1% | +41.9pp |
| Adversarial | 1.000 | 61.9% | +38.1pp |
**Temporal-inference was the hardest category** — questions requiring connections across multiple sessions. Hybrid scoring (person name boost, quoted phrase boost) combined with Sonnet's reading comprehension closes this gap entirely. From 46% to 100%.
---
## LongMemEval — Breakdown by Question Type
The 96.6% R@5 baseline broken down by the six question categories in LongMemEval:
| Question Type | R@5 | R@10 | Count | Notes |
|---|---|---|---|---|
| Knowledge update | 99.0% | 100% | 78 | Strongest — facts that changed over time |
| Multi-session | 98.5% | 100% | 133 | Very strong |
| Temporal reasoning | 96.2% | 97.0% | 133 | Strong |
| Single-session user | 95.7% | 97.1% | 70 | Strong |
| Single-session preference | 93.3% | 96.7% | 30 | Good — preferences stated indirectly |
| Single-session assistant | 92.9% | 96.4% | 56 | Weakest — questions about what the AI said |
The two weakest categories point to specific fixes:
- **Single-session assistant (92.9%)**: Questions ask about what the assistant said, not the user. Fixed by indexing assistant turns as well as user turns.
- **Single-session preference (93.3%)**: Preferences are often stated indirectly ("I usually prefer X"). Fixed by the preference extraction patterns in hybrid v3.
Both were addressed in the improvements that took the score from 96.6% to 99.4%.
---
## The Full Progression — How We Got from 96.6% to 99.4%
Every improvement below was a response to specific failure patterns in the results. Nothing was added speculatively.
### Starting Point: Raw ChromaDB (96.6%)
The baseline: store every session verbatim as a single document. Query with ChromaDB's default embeddings (all-MiniLM-L6-v2). No postprocessing.
This was the first result. Nobody expected it to work this well. The team's hypothesis was that raw verbatim storage would lose to systems that extract structured facts. The 96.6% proved the hypothesis wrong.
**What it does:** Stores verbatim session text. Embeds with sentence transformers. Retrieves by cosine similarity.
**What it misses:** Questions with vocabulary mismatch ("yoga classes" vs "I went this morning"), preference questions where the preference is implied, temporally-ambiguous questions where multiple sessions match.
---
### Improvement 1: Hybrid Scoring v1 → 97.8% (+1.2%)
**What changed:** Added keyword overlap scoring on top of embedding similarity.
```
fused_score = embedding_score × (1 + keyword_weight × overlap)
```
When query keywords appear verbatim in a session, that session gets a small boost. The boost is mild enough not to hurt recall when keywords don't match.
**Why it worked:** Some questions use exact terminology ("PostgreSQL", "Dr. Chen", specific names). Pure embedding similarity can rank a semantically-close session above the exact match. Keyword overlap rescues these cases.
**What it still misses:** Temporally-ambiguous questions. Sessions from the right time period rank equally with sessions from wrong time periods.
---
### Improvement 2: Hybrid Scoring v2 → 98.4% (+0.6%)
**What changed:** Added temporal boost — sessions near the question's reference date get a distance reduction (up to 40%).
```python
# Sessions near question_date - offset get score boost
if temporal_distance < threshold:
fused_dist *= (1.0 - temporal_boost * proximity_factor)
```
**Why it worked:** Many LongMemEval questions are anchored to a specific time ("what did you do last month?"). Multiple sessions might semantically match, but only one is temporally correct. The boost breaks ties in favor of the right time period.
---
### Improvement 3: Hybrid v2 + Haiku Rerank → 98.8% (+0.4%)
**What changed:** After retrieval, send the top-K candidates to Claude Haiku with the question. Ask Haiku to re-rank by relevance.
**Why it worked:** Embeddings measure semantic similarity, not answer relevance. Haiku can read the question and the retrieved documents and reason about which one actually answers the question — a task embeddings fundamentally cannot do.
**Cost:** ~$0.001/query for Haiku. Optional — the system runs fine without it.
---
### Improvement 4: Hybrid v3 + Haiku Rerank → 99.4% (+0.6%)
**What changed:** Added preference extraction — 16 regex patterns that detect how people actually express preferences in conversation, then create synthetic "User has mentioned: X" documents at index time.
Examples of what gets caught:
- "I usually prefer X" → `User has mentioned: preference for X`
- "I always do Y" → `User has mentioned: always does Y`
- "I don't like Z" → `User has mentioned: dislikes Z`
**Why it worked:** Preference questions are consistently hard for pure embedding retrieval. "What does the user prefer for database backends?" doesn't semantically match "I find Postgres more reliable in my experience" — but it does match a synthetic document that says "User has mentioned: finds Postgres more reliable." The explicit extraction bridges the vocabulary gap without losing the verbatim original.
**Why 16 patterns:** Manual analysis of the miss cases. Each pattern corresponds to a real failure mode found in the wrong-answer JSONL files.
---
### Improvement 5: Hybrid v4 + Haiku Rerank → **100%** (+0.6%)
**What changed:** Three targeted fixes for the three questions that failed in every previous mode.
The remaining misses were identified by loading both the hybrid v3 and palace results and finding the exact questions that failed in *both* architectures — confirming they were hard limits, not luck.
**Fix 1 — Quoted phrase extraction** (miss: `'sexual compulsions'` assistant question):
The question contained an exact quoted phrase in single quotes. Sessions containing that exact phrase now get a 60% distance reduction. The target session jumped from unranked to rank 1.
**Fix 2 — Person name boosting** (miss: `Rachel/ukulele` temporal question):
Sentence-embedded models give insufficient weight to person names. Capitalized proper nouns are extracted from queries; sessions mentioning that name get a 40% distance reduction. The target session jumped from unranked to rank 2.
**Fix 3 — Memory/nostalgia patterns** (miss: `high school reunion` preference question):
The target session said "I still remember the happy high school experiences such as being part of the debate team." Added patterns to preference extraction: `"I still remember X"`, `"I used to X"`, `"when I was in high school X"`, `"growing up X"`. This created a synthetic doc "User has mentioned: positive high school experiences, debate team, AP courses" — which the reunion question now matches. Target session jumped to rank 3.
**Result:** All 6 question types at 100% R@5. 500/500 questions. No regressions.
**Haiku vs. Sonnet rerank:** Both achieve 100% R@5. NDCG@10 is 0.976 (Haiku) vs 0.975 (Sonnet) — statistically identical. Haiku is ~3× cheaper. Sonnet is slightly faster at this task (2.99s/q vs 3.85s/q in our run). Either works; Haiku is the default recommendation.
---
### Parallel Approach: Palace Mode + Haiku Rerank → 99.4% (independent convergence)
Built independently from the hybrid track. Different architecture, same ceiling.
**Architecture:**
```
PALACE
└── HALL (concept: travel, work, health, relationships, general)
└── Two-pass retrieval:
Pass 1: tight search within inferred hall
Pass 2: full haystack with hall-based score bonuses
```
The palace classifies each question into one of 5 halls. Pass 1 searches only within that hall — high precision, catches the obvious match. Pass 2 searches the full corpus with the hall affinity as a tiebreaker — catches cases where the relevant session was miscategorized.
**Why this matters:** Two completely independent architectures (hybrid scoring vs. palace navigation) converged at exactly the same score (99.4%). This is the strongest possible validation of the retrieval ceiling. The ceiling is architectural, not a local maximum of any one approach.
---
### Active Work: Diary Mode (98.2% at 65% cache coverage)
**What it adds:** At ingest time, Claude Haiku reads each session and generates topic summaries and category labels. These become synthetic documents alongside the verbatim session.
**Why it matters:** The hardest remaining misses are vocabulary-gap failures — the question uses different words than the session. Diary topics bridge these gaps:
- Question: "yoga classes" → Session: "went this morning, instructor pushed me hard"
- With diary: synthetic doc says "fitness, morning workout, yoga-style exercise" → now both match
**Current status:** 98% cache coverage (18,803 of 19,195 sessions pre-computed). The overnight cache build is complete. Full benchmark run pending — expected to reach ≥99.4% once asymmetry from the remaining ~2% uncovered sessions is eliminated.
---
## Score Progression Summary
| Mode | R@5 | NDCG@10 | LLM | Cost/query | Status |
|---|---|---|---|---|---|
| Raw ChromaDB | 96.6% | 0.889 | None | $0 | ✅ Verified |
| Hybrid v1 | 97.8% | — | None | $0 | ✅ Verified |
| Hybrid v2 | 98.4% | — | None | $0 | ✅ Verified |
| Hybrid v2 + rerank | 98.8% | — | Haiku | ~$0.001 | ✅ Verified |
| Hybrid v3 + rerank | 99.4% | 0.983 | Haiku | ~$0.001 | ✅ Verified |
| Palace + rerank | 99.4% | 0.983 | Haiku | ~$0.001 | ✅ Verified |
| Diary + rerank (98% cache) | 98.2% | 0.956 | Haiku | ~$0.001 | ✅ Partial — full run pending |
| **Hybrid v4 + Haiku rerank** | **100%** | **0.976** | Haiku | ~$0.001 | ✅ Verified |
| **Hybrid v4 + Sonnet rerank** | **100%** | **0.975** | Sonnet | ~$0.003 | ✅ Verified |
| **Hybrid v4 held-out (450q)** | **98.4%** | **0.939** | None | $0 | ✅ Clean — never tuned on |
---
## Reproducing Every Result
### Setup
```bash
git clone https://github.com/MemPalace/mempalace.git
cd mempalace
uv sync --extra dev # or: pip install -e ".[dev]"
mkdir -p /tmp/longmemeval-data
curl -fsSL -o /tmp/longmemeval-data/longmemeval_s_cleaned.json \
https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json
```
### Raw (96.6%) — no API key, no LLM
```bash
python benchmarks/longmemeval_bench.py \
/tmp/longmemeval-data/longmemeval_s_cleaned.json
```
### Hybrid v3, no rerank (98.4% range) — no API key
```bash
python benchmarks/longmemeval_bench.py \
/tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid
```
### Hybrid v3 + Haiku rerank (99.4%) — needs API key
```bash
python benchmarks/longmemeval_bench.py \
/tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid_v3 \
--llm-rerank \
--api-key $ANTHROPIC_API_KEY
```
### Hybrid v4 + Haiku rerank (100%) — needs API key
```bash
python benchmarks/longmemeval_bench.py \
/tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid_v4 \
--llm-rerank \
--api-key $ANTHROPIC_API_KEY
```
### Hybrid v4 + Sonnet rerank (100%) — needs API key
```bash
python benchmarks/longmemeval_bench.py \
/tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid_v4 \
--llm-rerank \
--llm-model claude-sonnet-4-6 \
--api-key $ANTHROPIC_API_KEY
```
### Palace + Haiku rerank (99.4%) — needs API key
```bash
python benchmarks/longmemeval_bench.py \
/tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode palace \
--llm-rerank \
--api-key $ANTHROPIC_API_KEY
```
### Diary + Haiku rerank (needs precomputed cache) — needs API key
```bash
# First build the diary cache (one-time, ~$5-10 for all 19,195 sessions)
python /tmp/build_diary_cache.py
# Then run with cache
python benchmarks/longmemeval_bench.py \
/tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode diary \
--llm-rerank \
--api-key $ANTHROPIC_API_KEY \
--skip-precompute
```
### ConvoMem (92.9%)
```bash
python benchmarks/convomem_bench.py --category all --limit 50
```
### LoCoMo — no rerank (60.3% at top-10)
```bash
git clone https://github.com/snap-research/locomo.git /tmp/locomo
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --granularity session
```
### LoCoMo — hybrid + Sonnet rerank (100%)
```bash
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json \
--mode hybrid \
--granularity session \
--top-k 50 \
--llm-rerank \
--llm-model claude-sonnet-4-6 \
--api-key $ANTHROPIC_API_KEY
```
---
## The Competitive Field
Every major AI memory system and where it stands:
| System | Approach | LongMemEval | Requires | Notes |
|---|---|---|---|---|
| **MemPal** | Raw verbatim text + ChromaDB | 96.6% / 100% | Python + ChromaDB | Open source — 100% LME + 100% LoCoMo w/ rerank |
| Supermemory | Agentic LLM search (ASMR) | ~99% (exp) / ~85% (prod) | LLM API | Production + experimental tracks |
| Mastra | LLM observation extraction | 94.87% | GPT-5-mini | Highest validated production score |
| Hindsight | Time-aware vector retrieval | 91.4% | LLM API | Validated by Virginia Tech |
| Mem0 | LLM fact extraction | 3045% (ConvoMem) | LLM API | Popular, weak on benchmarks |
| OpenViking | Filesystem-paradigm context DB | Not published | Go + Rust + C++ + VLM | ByteDance; tested on LoCoMo10 only |
| Letta (MemGPT) | OS-inspired LLM context mgmt | Not published | LLM API | Stateful agent architecture |
| Zep | Graph-based memory + entity ext | Not published | LLM API + graph DB | Enterprise-focused |
**OpenViking note:** Tested on LoCoMo10 showing 52% task completion and 91% token savings. No LongMemEval scores published. Requires Go, Rust, C++, and a VLM API — highest infrastructure burden of any system here.
### Tradeoffs at a Glance
| | **MemPal** | LLM-Based (Mem0, Mastra) | Heavy Infra (OpenViking, Zep) |
|---|---|---|---|
| No API key needed | ✅ | ✗ | ✗ |
| Data stays local | ✅ | Sent to API | Depends |
| Dependencies | ChromaDB only | LLM + vector DB | Go + Rust + C++ + DB |
| Setup time | ~2 minutes | 1030 min | 1+ hours |
| Cost per query | $0 | $0.0010.01 | $00.01 |
| Retrieval accuracy | 96.6% (99.4% w/ LLM) | 9199% | Not published |
| Multi-hop reasoning | Moderate | Strong | Strong |
| Entity extraction | Regex patterns | LLM-powered | LLM-powered |
---
## Benchmark Integrity — The Honest Accounting
### What's clean and what isn't
The 96.6% raw baseline is fully clean. No heuristics were tuned on the test set. Store verbatim text, query with ChromaDB's default embeddings, score. Exactly reproducible.
The hybrid v4 improvements (quoted phrase boost, person name boost, nostalgia patterns) were developed by directly examining the three specific questions that failed in every prior mode:
- `d6233ab6``'sexual compulsions'` assistant question → fix: quoted phrase extraction
- `4dfccbf8` — Rachel/ukulele temporal question → fix: person name boost
- `ceb54acb` — high school reunion preference question → fix: nostalgia patterns
**This is teaching to the test.** The fixes were designed around the exact failure cases, not discovered by analyzing general failure patterns. The 100% result on those three questions is not a clean generalization — it's proof the specific fixes work on those specific questions.
In a peer-reviewed paper this would be a significant methodological problem. We're disclosing it here rather than letting it sit unexamined.
### What the 100% result actually means
The 96.6% → 99.4% improvements (hybrid v1v3) are honest improvements: each was motivated by a category of failures, not specific questions. The 99.4% → 100% hybrid v4 step is three targeted fixes for three known failures.
The three questions represent 0.6% of the dataset. It is entirely possible that:
1. The same fixes generalize and would score well on unseen data
2. The fixes are overfit to those three questions and harm other questions
We don't know which, because we measured on the same questions we tuned on.
### The Fix: Train/Test Split
A proper split has been created: `benchmarks/lme_split_50_450.json` (seed=42).
- **50 dev questions** — safe to use for iterative tuning. Improvements developed on dev data are honest.
- **450 held-out questions** — final publishable score. Touch once. Any iteration after viewing held-out results contaminates them.
Usage:
```bash
# Create a split (one-time)
python benchmarks/longmemeval_bench.py data/... --create-split --split-file benchmarks/lme_split_50_450.json
# Tune on dev (safe to run repeatedly)
python benchmarks/longmemeval_bench.py data/... --mode hybrid_v4 --dev-only --split-file benchmarks/lme_split_50_450.json
# Final evaluation — only when done tuning (results in filename tagged _held_out)
python benchmarks/longmemeval_bench.py data/... --mode hybrid_v4 --held-out --split-file benchmarks/lme_split_50_450.json
```
**The honest next number to publish is the held-out score on a fresh mode that was tuned on dev data only.** Anything else is contaminated.
### LoCoMo 100% — a separate caveat
The LoCoMo 100% result with top-k=50 has a structural issue: each of the 10 conversations has 1932 sessions, but top-k=50 exceeds that count. This means the ground-truth session is always in the candidate pool regardless of the embedding model's ranking. The Sonnet rerank is essentially doing reading comprehension over all sessions — the embedding retrieval step is bypassed entirely.
**The honest LoCoMo score is the top-10 result: 60.3% without rerank.** A re-run at top-k=10 with the hybrid mode and rerank is the next step for a publishable LoCoMo result.
---
## Notes on Reproducibility
**The scripts are deterministic.** Same data + same script = same result every time. ChromaDB's embeddings are deterministic. The benchmark uses a fixed dataset with no randomness.
**The data is public.** LongMemEval, LoCoMo, and ConvoMem are all published academic datasets. Links are in the scripts.
**The results are auditable.** Every result JSONL file in `benchmarks/results_*.jsonl` contains every question, every retrieved document, every score. You can inspect every individual answer — not just the aggregate.
**What "retrieval recall" means here.** These scores measure whether the correct session is in the top-K retrieved results. They do *not* measure whether an LLM can correctly answer the question using that retrieval. End-to-end QA accuracy measurement requires an LLM to generate answers, which requires an API key. The retrieval measurement itself is free.
**The LLM rerank is optional, not required.** The 96.6% baseline needs no API key at any stage — not for indexing, not for retrieval, not for scoring. The 99.4% result adds an optional Haiku rerank step that costs approximately $0.001 per question. This is standard practice: Supermemory ASMR, Mastra, and Hindsight all use LLMs in their retrieval pipelines.
---
## Results Files
All raw results are committed:
| File | Mode | R@5 | Notes |
|---|---|---|---|
| `results_raw_full500.jsonl` | raw | 96.6% | No LLM |
| `results_hybrid_v3_rerank_full500.jsonl` | hybrid+rerank | 99.4% | Haiku |
| `results_palace_rerank_full500.jsonl` | palace+rerank | 99.4% | Haiku |
| `results_diary_haiku_rerank_full500.jsonl` | diary+rerank | 98.2% | 65% cache, partial |
| `results_aaak_full500.jsonl` | aaak | 84.2% | Compressed sessions |
| `results_rooms_full500.jsonl` | rooms | 89.4% | Session rooms |
| `results_mempal_hybrid_v4_llmrerank_session_20260325_0930.jsonl` | hybrid_v4+rerank | 100% | Haiku, 500/500 |
| `results_mempal_hybrid_v4_llmrerank_session_20260325_1054.jsonl` | hybrid_v4+rerank | 100% | Sonnet, LME 500/500 |
| `results_locomo_hybrid_llmrerank_session_top50_20260325_1056.json` | locomo hybrid+rerank | 100% | Sonnet, 1986/1986 |
| `results_lme_hybrid_v4_held_out_450_20260326_0010.json` | hybrid_v4 held-out | 98.4% R@5 | Clean — 450 unseen questions |
| `results_locomo_hybrid_session_top10_*.json` | locomo hybrid_v5 | 88.9% R@10 | Honest — top-10, no rerank |
| `results_locomo_palace_session_top5_20260326_0031.json` | locomo palace v2 | 75.6% R@5 | Summary-based routing, 3 rooms |
| `results_locomo_palace_session_top10_20260326_0029.json` | locomo palace v2 | 84.8% R@10 | Summary-based routing, 3 rooms |
| `palace_cache_locomo.json` | — | — | 272 session room assignments (Haiku) |
| `diary_cache_haiku.json` | — | — | Pre-computed diary topics |
---
## Why We Publish This
The results are strong enough that we don't need to stretch anything. The honest version of this story is more compelling than any hype version could be:
- A non-commercial team built a memory system that beats commercial products with dedicated engineering.
- The key insight is *removal*, not addition — stop trying to extract and compress memory with LLMs; just keep the words.
- The result is reproducible by anyone with a laptop and 5 minutes.
The arXiv paper draft is titled: *"Raw Text Beats Extracted Memory: A Zero-API Baseline for Conversational Memory Retrieval"*
---
## New Results (March 26 2026)
### LongMemEval held-out 450 — hybrid_v4 (no rerank, clean score)
**98.4% R@5, 99.8% R@10 on 450 questions hybrid_v4 was never tuned on.**
This is the honest publishable number. hybrid_v4's fixes (quoted phrase boost, person name boost, nostalgia patterns) were developed by examining 3 questions from the full 500. The held-out 450 were never seen during development.
| Metric | Score |
|---|---|
| R@5 | **98.4%** (442/450) |
| R@10 | **99.8%** (449/450) |
| NDCG@5 | 0.939 |
| NDCG@10 | 0.938 |
Per-type (R@10):
- knowledge-update: 100% (69/69)
- multi-session: 100% (115/115)
- single-session-assistant: 100% (54/54)
- single-session-preference: **96.0%** (24/25) — only category with a miss
- single-session-user: 100% (63/63)
- temporal-reasoning: 100% (124/124)
**Conclusion:** hybrid_v4's improvements generalize. 98.4% on unseen data vs 100% on the contaminated dev set — a 1.6pp gap. The fixes are real, not overfit. The honest claim is "98.4% R@5 on a clean held-out set, 99.8% R@10."
Result file: `results_lme_hybrid_v4_held_out_450_20260326_0010.json`
---
### LoCoMo hybrid_v5 — honest top-10 (no rerank)
**88.9% R@10, 72.1% single-hop** on all 1986 questions.
The v5 fix: extracted person names from keyword overlap scoring. In LoCoMo, both speakers' names appear in every session — including them in keyword boosting gave equal signal to all sessions. Removing them lets predicate keywords ("research", "career") do the actual work.
| Category | R@10 |
|---|---|
| Single-hop | 72.1% |
| Temporal | 90.8% |
| Temporal-inference | 70.0% |
| Open-domain | 92.6% |
| Adversarial | 95.3% |
| **Overall** | **88.9%** |
Beats Memori (81.95%) by 7pp with no reranking. Result file: `results_locomo_hybrid_session_top10_*.json`
---
### LoCoMo palace mode — LLM room assignment (RESULTS)
**Architecture v1 (global taxonomy routing):** Haiku assigns each session to a room at index time. At query time, Haiku routes question to 1-2 rooms. **Result: 34.2% R@5** — 62.5% zero-recall. Failure: independent LLM calls with no shared context produced terminology mismatch between index-time labels and query-time routing.
**Architecture v2 (conversation-specific routing):** Same room assignments at index time. At query time, route using keyword overlap against per-room aggregated session summaries — the *same text* used to generate the labels. No LLM calls at query time. **Result: 84.8% R@10 (3 rooms), 75.6% R@5.**
| Version | R@5 | R@10 | Zero-recall | Notes |
|---|---|---|---|---|
| v1: global LLM routing | 34.2% | ~44% | 62.5% | Terminology mismatch |
| v2: summary-based routing, top-2 rooms | 71.7% | 77.9% | 17.8% | Big fix |
| **v2: summary-based routing, top-3 rooms** | **75.6%** | **84.8%** | **11.0%** | Best palace result |
| Hybrid v5 (no rooms) | 83.7% | 88.9% | — | Comparison baseline |
**Gap vs. hybrid_v5:** 4.1pp at R@10. The palace structure is working — room assignments are semantically correct (Caroline's identity dominates; Joanna+Nate in hobbies_creativity). The remaining gap is inherent to filtering: some sessions in room #4 or #5 by keyword score are missed even though they're relevant.
**Per-category (palace v2, top-3 rooms, top-10):**
| Category | R@10 |
|---|---|
| Single-hop | 65.4% |
| Temporal | 84.1% |
| Temporal-inference | 66.9% |
| Open-domain | 90.1% |
| Adversarial | 91.3% |
| **Overall** | **84.8%** |
Room taxonomy (14 rooms): identity_sexuality, career_education, relationships_romance, family_children, health_wellness, hobbies_creativity, social_community, home_living, travel_places, food_cooking, money_finance, emotions_mood, media_entertainment, general.
Sample room assignments (conv-26, Caroline + Melanie):
- 7/19 sessions → identity_sexuality (her dominant theme)
- 6/19 sessions → family_children
- 1/19 sessions → career_education ← where "What did Caroline research?" goes
- 2/19 sessions → hobbies_creativity (Melanie's painting)
Sample (conv-42, Joanna + Nate):
- 21/29 sessions → hobbies_creativity (gaming tournaments, screenwriting, film festivals)
Result files: `results_locomo_palace_session_top5_20260326_0031.json`, `results_locomo_palace_session_top10_20260326_0029.json`
---
### MemBench (ACL 2025) — all categories hybrid top-5
**80.3% R@5 overall** across 8,500 items (movie + roles + events topics).
| Category | R@5 | Notes |
|---|---|---|
| aggregative | **99.3%** | Combining info from multiple turns |
| comparative | **98.4%** | Comparing two items across turns |
| knowledge_update | **96.0%** | Facts that change over time |
| simple | **95.9%** | Single-turn fact recall |
| highlevel | **95.8%** | Inferences requiring aggregation |
| lowlevel_rec | **99.8%** | Recommendations — low-level |
| highlevel_rec | 76.2% | Recommendations — high-level |
| post_processing | 56.6% | Post-processing tasks |
| conditional | 57.3% | Conditional reasoning |
| **noisy** | **43.4%** | **Distractors/irrelevant info** |
| **Overall** | **80.3%** | 6828/8500 |
**Strongest categories**: aggregative (99.3%), comparative (98.4%), lowlevel_rec (99.8%) — MemPal handles multi-turn fact combination extremely well.
**Weakest**: noisy (43.4%) — questions designed with deliberate distractors and irrelevant information mixed in. This is the designed hard case for verbatim storage: when noise is indistinguishable from signal at the embedding level, retrieval degrades. Post-processing (56.6%) and conditional (57.3%) are reasoning-heavy categories where retrieval alone is insufficient.
Result file: `results_membench_hybrid_all_top5_20260326.json`
---
## Next Benchmarks (Clean Runs)
These are the runs needed to produce defensible, publishable numbers. None of these have been run yet.
### 1. Honest held-out score for hybrid_v4
**DONE** — see above. 98.4% R@5 on 450 held-out questions.
### 1b. Palace mode LoCoMo (in progress)
```bash
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid_v4 --llm-rerank \
--held-out --split-file benchmarks/lme_split_50_450.json \
--llm-model claude-haiku-4-5-20251001
```
**Expected:** likely still near 100% if the hybrid_v4 fixes generalize — but we don't know until we run it.
### 2. bge-large raw baseline (no heuristics, better embeddings)
The question: how much of the 96.6% → 99.4% improvement is the heuristics, and how much would come from just using a better embedding model?
```bash
uv pip install fastembed # or: pip install fastembed
uv run python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode raw --embed-model bge-large
```
**Expected:** somewhere between 96.6% and 99.4%. If it's near 99.4%, the heuristics are doing less work than they appear to.
### 3. Honest LoCoMo — hybrid at top-10
The 100% result used top-k=50 which exceeds the session count, making retrieval trivial. The honest number is top-k=10.
```bash
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json \
--mode hybrid --granularity session \
--top-k 10 \
--llm-rerank --llm-model claude-haiku-4-5-20251001
```
**Expected:** higher than the 60.3% raw top-10 baseline, lower than 100%.
### 4. bge-large on LoCoMo top-10
Same purpose as #2: isolate the contribution of a better embedding model from the contribution of heuristics.
```bash
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json \
--mode raw --granularity session --top-k 10 --embed-model bge-large
```
---
*Results verified March 2026. Scripts and raw data committed to this repo.*
-550
View File
@@ -1,550 +0,0 @@
# Hybrid Retrieval Mode — Design, Results, and Next Steps
**Written by Lu (DTL) — March 24, 2026**
**For: Ben**
---
## What This Is
A detailed writeup of the hybrid retrieval modes added to `longmemeval_bench.py` during the overnight session (March 2324) and morning session (March 24). This covers why they were built, exactly how they work, what the numbers are, and where to take it next.
---
## The Problem Hybrid Mode Solves
The raw mode (`--mode raw`) gets **96.6% R@5** on LongMemEval. That's already excellent. But looking at the failures, two clear patterns emerged:
**1. Specific nouns that embeddings underweight.**
Examples of questions that failed in raw mode but pass in hybrid:
- "What degree did I graduate with?" → answer: "Business Administration" — semantically generic, but the exact phrase is findable via keyword match
- "What kitchen appliance did I buy?" → answer: "stand mixer" — generic appliance question, but "stand mixer" is a specific retrievable string
- "Where did I study abroad?" → answer: "Melbourne" — city names embed poorly when surrounded by many generic context words
The embedding model sees "Business Administration" and "Computer Science" as similarly close to "what degree did I graduate with." Keyword matching is decisive: only one document contains both "degree" and "Business Administration."
**2. Temporal references that embeddings ignore.**
Questions like "What was the significant business milestone I mentioned four weeks ago?" contain a time anchor that embeddings don't use at all. The correct session was always semantically in the top-50 — but not ranked first because the temporal signal was invisible to embeddings. A date-proximity boost fixes this.
---
## How Hybrid Mode Works (`--mode hybrid`)
Two stages, no LLM calls, no added dependencies:
### Stage 1: Semantic retrieval (same as raw)
Query ChromaDB with the question text. Retrieve **top 50** candidates (raw uses 10, hybrid uses 50 to give stage 2 more to work with).
### Stage 2: Keyword re-ranking
Extract meaningful keywords from the question (strip stop words). For each retrieved document, compute keyword overlap score. Apply a **distance reduction** proportional to overlap:
```python
fused_dist = dist * (1.0 - 0.30 * overlap)
```
**Breaking this formula down:**
- `dist` — ChromaDB cosine distance (lower = better match)
- `overlap` — fraction of question keywords found in the document (0.0 to 1.0)
- `0.30` — the boost weight: up to 30% distance reduction for perfect keyword overlap
**Example:**
- Document A: dist=0.45, overlap=0.0 → fused=0.450 (no change)
- Document B: dist=0.52, overlap=1.0 → fused=0.364 (30% better — jumps ahead of A)
After re-ranking, sort by fused_dist ascending. The final ranked list is returned.
### Stop word list
The keyword extractor strips common words that add noise:
```python
STOP_WORDS = {
"what", "when", "where", "who", "how", "which", "did", "do",
"was", "were", "have", "has", "had", "is", "are", "the", "a",
"an", "my", "me", "i", "you", "your", "their", "it", "its",
"in", "on", "at", "to", "for", "of", "with", "by", "from",
"ago", "last", "that", "this", "there", "about", "get", "got",
"give", "gave", "buy", "bought", "made", "make",
}
```
Only words 3+ characters that aren't stop words count as keywords.
---
## How Hybrid V2 Works (`--mode hybrid_v2`)
Three targeted fixes on top of hybrid, each addressing a specific failure category found by analyzing the exact 11 questions that hybrid v1 missed.
### Fix 1: Temporal date boost
LongMemEval entries include a `question_date` field — the date the question was asked. Sessions have timestamps. Questions like "four weeks ago" or "last month" have a mathematically correct answer: the session that falls nearest to `question_date - offset`.
```python
# Parse the temporal reference from the question
days_offset, window_days = parse_time_offset_days(question)
# Compute the target date
target_date = question_date - timedelta(days=days_offset)
# For each session, measure proximity to target_date
days_diff = abs((session_date - target_date).days)
# Apply up to 40% distance reduction for sessions within the window
temporal_boost = max(0.0, 0.40 * (1.0 - days_diff / window_days))
fused_dist = fused_dist * (1.0 - temporal_boost)
```
Temporal patterns handled: `"N days ago"`, `"a couple of days ago"`, `"a week ago"`, `"N weeks ago"`, `"last week"`, `"a month ago"`, `"N months ago"`, `"recently"`.
### Fix 2: Two-pass retrieval for assistant-reference questions
Questions like "You suggested X, can you remind me..." refer to what the *assistant* said — but the standard index only stores user turns. A naive fix (index all turns globally) dilutes the semantic signal.
The two-pass approach is targeted:
```python
# Pass 1: find top-5 sessions using user-turn-only index (fast, focused)
top_sessions = semantic_search(user_turns_only, question, top_k=5)
# Pass 2: for those 5 sessions only, re-index with FULL text (user + assistant)
# then re-query with the original question
full_text_collection = build_collection(top_sessions, include_assistant=True)
results = semantic_search(full_text_collection, question, top_k=5)
```
This gives assistant-reference questions a full-text index to search, without polluting the global index that semantic questions depend on.
Detection heuristic:
```python
triggers = ["you suggested", "you told me", "you mentioned", "you said",
"you recommended", "remind me what you", "you provided",
"you listed", "you gave me", "you described", "what did you",
"you came up with", "you helped me", "you explained",
"can you remind me", "you identified"]
```
### Fix 3: Hybrid keyword boost (same as v1)
All the v1 keyword re-ranking applied on top of fixes 1 and 2.
---
## Results
### LongMemEval (500 questions, session granularity)
| Mode | R@5 | R@10 | NDCG@10 | vs Raw |
|------|-----|------|---------|--------|
| **Raw (baseline)** | 96.6% | 98.2% | 0.889 | — |
| **Hybrid v1 w=0.30** | 97.8% | 98.8% | 0.930 | +1.2pp / +0.6pp / +0.041 |
| **Hybrid v2 w=0.30** | 98.4% | 99.0% | 0.934 | +1.8pp / +0.8pp / +0.045 |
| **Hybrid v2 + LLM rerank** | 98.8% | 99.0% | 0.966 | +2.2pp / +0.8pp / +0.077 |
| **Hybrid v3 + LLM rerank** | 99.4% | 99.6% | 0.975 | +2.8pp / +1.4pp / +0.086 |
| **Palace + LLM rerank** | **99.4%** | **99.4%** | **0.973** | **+2.8pp / +1.2pp / +0.084** |
| **Diary + LLM rerank (65% cache)** | 98.2% | 98.4% | 0.956 | +1.6pp / +0.2pp / +0.067 |
**+2.8 percentage points at R@5 vs raw** = 14 more questions answered correctly out of 500.
**Both v3 and palace reach 99.4% R@5** — two independent architectures converging on the same ceiling.
**Only 3 misses remain** across both top modes.
**Diary result (98.2%) is with 65% cache coverage only** — 35% of sessions had no diary context. Full-coverage result pending (cache building overnight). The partial result shows the diary layer can introduce noise when only partially applied; full coverage result expected to be ≥99.4%.
Per-type R@5 breakdown (hybrid v3 + LLM rerank):
- knowledge-update: **100%** (n=78)
- multi-session: **100%** (n=133)
- single-session-user: **100%** (n=70)
- temporal-reasoning: **99.2%** (n=133)
- single-session-assistant: **98.2%** (n=56)
- single-session-preference: **96.7%** (n=30)
### Remaining 3 misses (after hybrid v3 + LLM rerank)
**Only 3 questions remain unresolved out of 500.**
Hybrid v3 fixed the preference and assistant failures that v2 left behind:
- preference: 93.3% → **96.7%** (synthetic preference docs bridged the vocabulary gap)
- assistant: 96.4% → **98.2%** (expanded top-20 rerank pool caught rank-11-12 sessions)
- temporal: 98.5% → **99.2%**
The 3 remaining misses are edge cases — likely irreducible without deeper semantic reasoning than a single Haiku pick can provide. At 99.4% R@5, this is at or near the practical ceiling for session-granularity retrieval on LongMemEval.
### Weight tuning — full 500-question results
Ran experiments across 5 weights. 100-question samples showed 99% R@5 at w=0.40, but the full 500 reveals this was sampling variance. On all 500 questions, 0.30 and 0.40 are essentially equivalent:
| Weight | N | R@5 | R@10 | NDCG@10 | Notes |
|--------|---|-----|------|---------|-------|
| 0.10 | 100 | 97.0% | 100.0% | 0.909 | too conservative |
| 0.20 | 100 | 98.0% | 100.0% | 0.934 | good |
| **0.30** | **500** | **97.8%** | **98.8%** | **0.930** | **default — best R@5** |
| 0.40 | 500 | 97.4% | 98.8% | 0.932 | within noise |
| 0.50 | 100 | 99.0% | 100.0% | 0.953 | sample variance |
| 0.60 | 100 | 99.0% | 100.0% | 0.955 | sample variance |
**Conclusion:** Default stays at 0.30. The 100-question experiments overfit to that specific sample. Full 500 is ground truth.
### Verified: all 500 questions scored, no memory wall
`EphemeralClient` (in-memory ChromaDB) eliminates the Q388 hang entirely. The benchmark now runs clean end-to-end without the split trick. Split is still supported for very long runs but no longer needed.
```bash
# Simple single run — no split needed
python benchmarks/longmemeval_bench.py data/longmemeval_s_cleaned.json --mode hybrid_v2
```
---
## Reproducing the Results
```bash
# Setup
git clone https://github.com/MemPalace/mempalace.git
cd mempalace
uv sync --extra dev # or: pip install -e ".[dev]"
# Download data
mkdir -p /tmp/longmemeval-data
curl -fsSL -o /tmp/longmemeval-data/longmemeval_s_cleaned.json \
https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json
# Run palace + LLM rerank (requires API key)
export ANTHROPIC_API_KEY=sk-ant-... # or use --llm-key flag
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode palace --llm-rerank --out benchmarks/results_palace_llmrerank_full500.jsonl
# Run hybrid v3 + LLM rerank (requires API key)
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid_v3 --llm-rerank
# Expected output:
# R@5: 99.4% R@10: 99.6% NDCG@10: 0.975
# Run hybrid v2 + LLM rerank (local-friendly, no preference extraction)
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid_v2 --llm-rerank
# Expected output:
# R@5: 98.8% R@10: 99.0% NDCG@10: 0.966
# Run hybrid v2 without LLM (local-only, no API key needed)
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid_v2
# Expected output:
# R@5: 98.4% R@10: 99.0% NDCG@10: 0.934
# Run hybrid v1 for comparison
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid
# Expected output:
# R@5: 97.8% R@10: 98.8% NDCG@10: 0.930
# Tune the keyword boost weight
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode hybrid --hybrid-weight 0.40 --limit 100
```
**Run time:**
- hybrid_v2 (local): ~200s for full 500 on Apple Silicon
- hybrid_v2 + LLM rerank: ~620s (~10 min) — adds ~0.8s per question for Haiku API call
- palace (local): ~280s — slightly slower due to two-pass hall navigation
- palace + LLM rerank: ~700s (~12 min)
---
## How Palace Mode Works (`--mode palace`)
Palace mode is a structural upgrade that uses the full MemPal hall/wing/closet/drawer architecture for retrieval. Instead of searching everything flat, it navigates into the most likely hall first, then falls back to the full haystack with hall-aware scoring.
### The Palace Structure
```
PALACE
└── HALL (content type: preferences / facts / events / assistant_advice / general)
└── CLOSET (user turns per session — the primary index)
└── DRAWER (assistant turns — opened on demand for assistant-reference questions)
└── PREFERENCE WING (synthetic docs extracted from user expressions — separate from halls)
```
### Hall Classification
Every session is classified into one of 5 halls at ingest time:
- **hall_preferences** — sessions about what the user likes, hates, avoids, or tends to do
- **hall_facts** — sessions about biographical facts: job, location, education, family
- **hall_events** — sessions about things that happened: trips, purchases, achievements
- **hall_assistant_advice** — sessions where the user asked for recommendations or opinions
- **hall_general** — everything else
Questions are classified the same way. "Where do I work?" → `hall_facts`. "What did I buy recently?" → `hall_events`. "What did you recommend for X?" → `hall_assistant_advice`.
### Two-Pass Navigation
**Pass 1 — Navigate to primary hall (tight search):**
For questions with a specific hall match, search only that hall's closet collection. Smaller pool = less noise = tighter results. For questions classified as `hall_general`, skip Pass 1 entirely — no benefit from narrowing to an uncategorized bucket.
Sessions found in Pass 1 are "hall-validated" — they appear in both the tight hall search and the full search.
**Pass 2 — Full haystack with hall-aware scoring:**
Search all sessions with hybrid scoring, plus:
- 25% distance reduction for sessions in the primary hall (strong signal)
- 10% distance reduction for sessions in secondary halls
- 15% extra reduction for sessions that were hall-validated in Pass 1 (double confirmation)
**The key insight:** Halls *reduce noise* by narrowing the initial search pool, but the final ranking is always score-based — hall navigation is a boost, not an override. This prevents the case where wrong hall sessions pre-empt the correct answer.
### Drawer Access (for `hall_assistant_advice` questions only)
Drawers = assistant turns. They're indexed separately and only opened when the question targets `hall_assistant_advice`. This avoids polluting the semantic index (which finds the right *session*) while still enabling full-text search within the right sessions for "what did you tell me about X" questions.
### Preference Wing
Same as hybrid_v3: 16 regex patterns extract preference expressions from user turns at ingest time. Synthetic documents ("User has mentioned: X; Y") are stored in a separate preference wing with the same session ID. For preference questions, the preference wing is included in Pass 1 — it directly bridges the vocabulary gap between question phrasing and session text.
---
## How Diary Mode Works (`--mode diary`)
Diary mode is palace mode + an LLM topic layer added at ingest time. It addresses the vocabulary gap that embeddings can't bridge — where the question uses completely different words than the session.
### The Problem It Solves
Palace mode still misses questions like: *"Where do I take yoga classes?"* when the relevant session only says *"I went this morning, my instructor was great."* No keyword overlap, no semantic bridge. The embedding sees "yoga classes" vs "went this morning" — too different.
### How It Works
Before the benchmark loop, every unique session is processed by Haiku once:
```python
prompt = (
"Read this conversation excerpt (user turns only) and extract:\n"
"Return a JSON object: {\"topics\": [\"specific topic 1\", ...], \"summary\": \"1-2 sentences\"}\n"
"Rules: topics must be SPECIFIC."
)
# Returns: {"topics": ["yoga classes", "Tuesday routine", "workout schedule"], "summary": "..."}
```
A synthetic document is added to the ChromaDB collection with the **same corpus_id**:
```
"Session topics: yoga classes, Tuesday routine, workout schedule. Summary: ..."
```
Now "yoga classes" matches the question directly. The evaluation maps the synthetic doc back to the correct session because they share a corpus_id.
### Pre-computation and Caching
19,195 unique sessions in the 500-question dataset. Processing all at ~1s/session = ~5 hours. Caching solves this:
```bash
# First run: builds cache
python benchmarks/longmemeval_bench.py ... --mode diary --diary-cache benchmarks/diary_cache_haiku.json
# Subsequent runs: instant (loads cache, zero API calls for pre-computation)
python benchmarks/longmemeval_bench.py ... --mode diary --diary-cache benchmarks/diary_cache_haiku.json
```
The `--skip-precompute` flag skips pre-computation and uses the cache as-is, falling back to pure palace for uncached sessions.
### LLM Rerank compatibility
`--llm-rerank` works with diary mode. The reranker sees the full enriched corpus (including diary synthetic docs) when selecting the best session. This is the full stack.
```bash
# Full diary + rerank run (requires complete cache for best results)
export ANTHROPIC_API_KEY=sk-ant-...
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json \
--mode diary --llm-rerank --diary-cache benchmarks/diary_cache_haiku.json
```
### Note on Cache Coverage
The partial-coverage run (65% cache, 35% fell back to palace) gave R@5=98.2% — lower than palace+rerank at 99.4%. Partial diary coverage introduces vocabulary-bridging docs for some sessions but not others, creating retrieval asymmetry. Full-coverage result (100% sessions with diary topics) is expected to equal or beat 99.4%.
---
## How Hybrid V3 Works (`--mode hybrid_v3`)
Hybrid v2 + two targeted fixes for the remaining 6 misses.
### Fix 1: Preference extraction at ingest
Scans every user turn for expressions of preference, concern, or intent using 16 regex patterns:
```python
PREF_PATTERNS = [
r"i've been having (?:trouble|issues?|problems?) with X",
r"i've been feeling X",
r"i've been (?:struggling|dealing) with X",
r"i(?:'m| am) (?:worried|concerned) about X",
r"i prefer X",
r"i usually X",
r"i want to X",
r"i'm thinking (?:about|of) X",
r"lately[,\s]+i've been X",
r"recently[,\s]+i've been X",
r"i've been (?:working on|focused on|interested in) X",
# ... 5 more
]
```
For sessions where preferences are extracted, a synthetic document is added to ChromaDB alongside the session document — with the **same corpus_id**:
```
"User has mentioned: battery life issues on phone; looking at phone upgrade options"
```
This document ranks near the top for "I've been having trouble with battery life" even when the session text never uses those exact words. The evaluation correctly maps it to the right session.
### Fix 2: Expanded LLM rerank pool (20 instead of 10)
Some assistant-reference failures had the correct session at rank 11-12 — just outside the window Haiku sees. Expanding to top-20 catches these with negligible prompt cost.
## How LLM Re-ranking Works (`--llm-rerank`)
An optional fourth pass that works with any retrieval mode. Add `--llm-rerank` to any run.
```python
# After hybrid_v2 retrieval, take top-10 sessions
# Send question + numbered session snippets (500 chars each) to Haiku
# Haiku picks the single most relevant session number
# That session is promoted to rank 1; rest stay in hybrid_v2 order
```
**The prompt (minimal by design):**
```
Question: {question}
Below are 10 conversation sessions from someone's memory. Which single session
is most likely to contain the answer? Reply with ONLY a number between 1 and 10.
Session 1: {text[:500]}
...
Session 10: {text[:500]}
Most relevant session number:
```
**Why this works for preference failures:**
Embeddings can't bridge "battery life on my phone" → phone hardware research session because the vocabulary doesn't overlap. Haiku reasons about intent: "someone asking about battery problems likely had a session about phone hardware." This is the semantic gap that LLMs exist to close.
**Why only 1 pick (not a full ranking):**
Asking for a full ranking increases prompt complexity and error rate. Picking the single best is decisive and reliable. The rest of the ranking stays in hybrid_v2 order, which is already excellent.
**Graceful degradation:**
If the API call fails (timeout, rate limit, no key), the function catches the exception and returns the original hybrid_v2 ranking unchanged. The benchmark never crashes due to the LLM pass.
**Key loading priority:**
1. `--llm-key` CLI flag
2. `ANTHROPIC_API_KEY` environment variable
## What Changed in the Code
### 1. EphemeralClient (no more Q388 hang)
All five `PersistentClient + tmpdir` patterns replaced with a module-level singleton:
```python
_bench_client = chromadb.EphemeralClient()
def _fresh_collection(name="mempal_drawers"):
try:
_bench_client.delete_collection(name)
except Exception:
pass
return _bench_client.create_collection(name)
```
Benefits:
- No temp files, no SQLite handles accumulating
- ~2x faster per question (no disk I/O)
- Full 500 runs without splitting
### 2. `--hybrid-weight` CLI flag
```python
parser.add_argument("--hybrid-weight", type=float, default=0.30,
help="Keyword boost weight for hybrid mode (default: 0.30)")
```
### 3. `--mode hybrid_v2` added to choices
Full function `build_palace_and_retrieve_hybrid_v2()` with temporal boost and two-pass assistant retrieval. See `longmemeval_bench.py` lines ~406560.
### 4. LoCoMo default top-k: 10 → 50
Going from top-10 to top-50 on LoCoMo was free performance (+17pp on dialog granularity). Updated default in `locomo_bench.py`.
---
## Where to Go Next
The 5 remaining misses fall into two tractable categories:
### 1. Preference extraction at ingest time
2 of 5 remaining failures are "preference" questions where the question contains no searchable terms from the relevant session. The fix requires annotating sessions at ingest:
- Detect "I prefer X", "I usually do Y", "I've been having trouble with Z" patterns
- Store a separate preference document per detected preference
- Boost preference documents when question looks like a preference query
Expected: catch 12 of the 2 remaining preference failures. New R@5: **~98.8%**.
### 2. LLM-assisted re-ranking
For jargon-dense questions ("Hardware-Aware Modular Training") and context-gap questions ("business milestone"), a lightweight LLM re-ranker as a third pass could close the remaining gap:
- Retrieve top-10 sessions via hybrid_v2
- Ask a small LLM: "Given this question, which session is most relevant? Rank these 10."
- Re-order based on LLM output
This would add one LLM call per question — stays under 1 second with a fast model (Haiku). But breaks the "no API key" guarantee for local-only deployments.
### 3. The 99% ceiling
The 5 remaining failures include at least 2 that are arguably ambiguous — the question could reasonably retrieve multiple sessions. 99% may be the practical ceiling for session-granularity retrieval on LongMemEval without LLM assistance.
---
## File Map
```
benchmarks/
longmemeval_bench.py — main benchmark + all modes
locomo_bench.py — LoCoMo benchmark (top-k default now 50)
results_hybrid_full500_merged.jsonl — hybrid v1 results (R@5=97.8%)
results_hybrid_w040_full500_merged.jsonl — hybrid v1 w=0.40 comparison (R@5=97.4%)
results_hybrid_v2_full500_merged.jsonl — hybrid v2 results (R@5=98.4%)
results_hybrid_v2_llmrerank_full500.jsonl — hybrid v2 + LLM rerank (R@5=98.8%)
results_hybrid_v3_llmrerank_full500.jsonl — hybrid v3 + LLM rerank (R@5=99.4%, NDCG=0.975) ← CURRENT BEST (tied)
results_palace_full500.jsonl — palace mode (R@5=97.2%, no rerank)
results_palace_llmrerank_full500.jsonl — palace + LLM rerank (R@5=99.4%, NDCG=0.973) ← CURRENT BEST (tied)
results_diary_haiku_rerank_full500.jsonl — diary + LLM rerank, 65% cache (R@5=98.2%) ← partial, full pending
diary_cache_haiku.json — pre-computed Haiku topics for 3977+ sessions (building to 19195)
NOTES_FOR_MILLA.md — Ben's full analysis + paper discussion
HYBRID_MODE.md — this file
```
---
## Key Design Decisions and Why
**Why 30% keyword boost?**
Strong enough to flip edge cases (a semantically ambiguous doc with perfect keyword overlap), not so strong it overrides clearly-better semantic results. Full 500-question validation confirms 0.30 is optimal. Higher weights show no improvement on the full set.
**Why top-50 retrieval then re-rank?**
Larger candidate pool gives keyword re-ranking more to work with. If the answer is at position 45 semantically but has perfect keyword overlap, we need it in the pool to promote it. Cost: ChromaDB returns slightly more data per query. Impact on speed: negligible.
**Why two-pass instead of global assistant indexing?**
Global assistant indexing dilutes the semantic signal — every session's assistant text competes with every other. Two-pass is surgical: use user turns to find the right session first, then use full text only within that session. Tested both approaches; two-pass wins.
**Why no LLM calls?**
The whole MemPal pitch is "no API key, no cloud." Hybrid and hybrid_v2 maintain this. Everything is local string matching and date arithmetic.
**Why only 40% temporal boost (not 100%)?**
Temporal proximity is a strong signal but not definitive. A 40% maximum reduction means semantically excellent matches can't be completely overridden by date proximity alone. It's a hint, not a rule.
---
## Contact
Questions → Milla (Aya) will relay to Lu. Or push changes to `ben/benchmarking` and Lu will review next session.
-124
View File
@@ -1,124 +0,0 @@
# MemPalace Benchmarks — Reproduction Guide
Run the exact same benchmarks we report. Clone, install, run.
## Setup
```bash
git clone https://github.com/MemPalace/mempalace.git
cd mempalace
uv sync --extra dev # or: pip install -e ".[dev]"
```
## Benchmark 1: LongMemEval (500 questions)
Tests retrieval across ~53 conversation sessions per question. The standard benchmark for AI memory.
```bash
# Download data
mkdir -p /tmp/longmemeval-data
curl -fsSL -o /tmp/longmemeval-data/longmemeval_s_cleaned.json \
https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json
# Run (raw mode — our headline 96.6% result)
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json
# Run with AAAK compression (84.2%)
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --mode aaak
# Run with room-based boosting (89.4%)
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --mode rooms
# Quick test on 20 questions first
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --limit 20
# Turn-level granularity
python benchmarks/longmemeval_bench.py /tmp/longmemeval-data/longmemeval_s_cleaned.json --granularity turn
```
**Expected output (raw mode, full 500):**
```
Recall@5: 0.966
Recall@10: 0.982
NDCG@10: 0.889
Time: ~5 minutes on Apple Silicon
```
## Benchmark 2: LoCoMo (1,986 QA pairs)
Tests multi-hop reasoning across 10 long conversations (19-32 sessions each, 400-600 dialog turns).
```bash
# Clone LoCoMo
git clone https://github.com/snap-research/locomo.git /tmp/locomo
# Run (session granularity — our 60.3% result)
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --granularity session
# Dialog granularity (harder — 48.0%)
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --granularity dialog
# Higher top-k (77.8% at top-50)
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --top-k 50
# Quick test on 1 conversation
python benchmarks/locomo_bench.py /tmp/locomo/data/locomo10.json --limit 1
```
**Expected output (session, top-10, full 10 conversations):**
```
Avg Recall: 0.603
Temporal: 0.692
Time: ~2 minutes
```
## Benchmark 3: ConvoMem (Salesforce, 75K+ QA pairs)
Tests six categories of conversational memory. Downloads from HuggingFace automatically.
```bash
# Run all categories, 50 items each (our 92.9% result)
python benchmarks/convomem_bench.py --category all --limit 50
# Single category
python benchmarks/convomem_bench.py --category user_evidence --limit 100
# Quick test
python benchmarks/convomem_bench.py --category user_evidence --limit 10
```
**Categories available:** `user_evidence`, `assistant_facts_evidence`, `changing_evidence`, `abstention_evidence`, `preference_evidence`, `implicit_connection_evidence`
**Expected output (all categories, 50 each):**
```
Avg Recall: 0.929
Assistant Facts: 1.000
User Facts: 0.980
Time: ~2 minutes
```
## What Each Benchmark Tests
| Benchmark | What it measures | Why it matters |
|---|---|---|
| **LongMemEval** | Can you find a fact buried in 53 sessions? | Tests basic retrieval quality — the "needle in a haystack" |
| **LoCoMo** | Can you connect facts across conversations over weeks? | Tests multi-hop reasoning and temporal understanding |
| **ConvoMem** | Does your memory system work at scale? | Tests all memory types: facts, preferences, changes, abstention |
## Results Files
Raw results are in `benchmarks/results_*.jsonl` and `benchmarks/results_*.json`. Each file contains every question, every retrieved document, and every score — fully auditable.
## Requirements
- Python 3.9+
- `chromadb` (the only dependency)
- ~300MB disk for LongMemEval data
- ~5 minutes for each full benchmark run
- No API key. No internet during benchmark (after data download). No GPU.
## Next Benchmarks (Planned)
- **Scale testing** — ConvoMem at 50/100/300 conversations per item
- **Hybrid AAAK** — search raw text, deliver AAAK-compressed results
- **End-to-end QA** — retrieve + generate answer + measure F1 (needs LLM API key)
-342
View File
@@ -1,342 +0,0 @@
#!/usr/bin/env python3
"""
MemPal × ConvoMem Benchmark
==============================
Evaluates MemPal's retrieval against the ConvoMem benchmark.
75,336 QA pairs across 6 evidence categories.
For each evidence item:
1. Ingest all conversations into a fresh MemPal palace (one drawer per message)
2. Query with the question
3. Check if any retrieved message matches the evidence messages
Since ConvoMem has 75K items across many files, we sample a subset for benchmarking.
Downloads evidence files from HuggingFace on first run.
Usage:
python benchmarks/convomem_bench.py # sample 100 items
python benchmarks/convomem_bench.py --limit 500 # sample 500 items
python benchmarks/convomem_bench.py --category user_evidence # one category only
python benchmarks/convomem_bench.py --mode aaak # test AAAK compression
"""
import os
import sys
import json
import shutil
import tempfile
import argparse
import urllib.request
from pathlib import Path
from collections import defaultdict
from datetime import datetime
import chromadb
sys.path.insert(0, str(Path(__file__).parent.parent))
HF_BASE = "https://huggingface.co/datasets/Salesforce/ConvoMem/resolve/main/core_benchmark/evidence_questions"
CATEGORIES = {
"user_evidence": "User Facts",
"assistant_facts_evidence": "Assistant Facts",
"changing_evidence": "Changing Facts",
"abstention_evidence": "Abstention",
"preference_evidence": "Preferences",
"implicit_connection_evidence": "Implicit Connections",
}
# Sample files per category (1_evidence = single-message evidence, simplest)
SAMPLE_FILES = {
"user_evidence": "1_evidence/0050e213-5032-42a0-8041-b5eef2f8ab91_Telemarketer.json",
"assistant_facts_evidence": None, # will discover
"changing_evidence": None,
"abstention_evidence": None,
"preference_evidence": None,
"implicit_connection_evidence": None,
}
# =============================================================================
# DATA LOADING
# =============================================================================
def download_evidence_file(category, subpath, cache_dir):
"""Download a single evidence file from HuggingFace."""
url = f"{HF_BASE}/{category}/{subpath}"
cache_path = os.path.join(cache_dir, category, subpath.replace("/", "_"))
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
if os.path.exists(cache_path):
with open(cache_path) as f:
return json.load(f)
print(f" Downloading: {category}/{subpath}...")
try:
urllib.request.urlretrieve(url, cache_path)
with open(cache_path) as f:
return json.load(f)
except Exception as e:
print(f" Failed to download {url}: {e}")
return None
def discover_files(category, cache_dir):
"""Discover available files for a category via HuggingFace API."""
api_url = f"https://huggingface.co/api/datasets/Salesforce/ConvoMem/tree/main/core_benchmark/evidence_questions/{category}/1_evidence"
cache_path = os.path.join(cache_dir, f"{category}_filelist.json")
if os.path.exists(cache_path):
with open(cache_path) as f:
return json.load(f)
try:
req = urllib.request.Request(api_url)
with urllib.request.urlopen(req, timeout=15) as resp:
files = json.loads(resp.read())
paths = [
f["path"].split(f"{category}/")[1] for f in files if f["path"].endswith(".json")
]
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
with open(cache_path, "w") as f:
json.dump(paths, f)
return paths
except Exception as e:
print(f" Failed to list files for {category}: {e}")
return []
def load_evidence_items(categories, limit, cache_dir):
"""Load evidence items from specified categories."""
all_items = []
for category in categories:
# Discover files
files = discover_files(category, cache_dir)
if not files:
# Fallback to known file
known = SAMPLE_FILES.get(category)
if known:
files = [known]
else:
print(f" Skipping {category} — no files found")
continue
# Download files until we have enough items
items_for_cat = []
for fpath in files:
if len(items_for_cat) >= limit:
break
data = download_evidence_file(category, fpath, cache_dir)
if data and "evidence_items" in data:
for item in data["evidence_items"]:
item["_category_key"] = category
items_for_cat.append(item)
all_items.extend(items_for_cat[:limit])
print(f" {CATEGORIES.get(category, category)}: {len(items_for_cat[:limit])} items loaded")
return all_items
# =============================================================================
# RETRIEVAL
# =============================================================================
def retrieve_for_item(item, top_k=10, mode="raw"):
"""
Ingest conversations, query, check if evidence was retrieved.
Returns:
recall: float (fraction of evidence messages found in top-k)
details: dict with retrieved texts and match info
"""
conversations = item.get("conversations", [])
question = item["question"]
evidence_messages = item.get("message_evidences", [])
evidence_texts = set(e["text"].strip().lower() for e in evidence_messages)
# Build corpus: one doc per message
corpus = []
corpus_speakers = []
for conv in conversations:
for msg in conv.get("messages", []):
corpus.append(msg["text"])
corpus_speakers.append(msg["speaker"])
if not corpus:
return 0.0, {"error": "empty corpus"}
tmpdir = tempfile.mkdtemp(prefix="mempal_convomem_")
palace_path = os.path.join(tmpdir, "palace")
try:
client = chromadb.PersistentClient(path=palace_path)
collection = client.create_collection("mempal_drawers")
# Optionally compress
if mode == "aaak":
from mempalace.dialect import Dialect
dialect = Dialect()
docs = [dialect.compress(doc) for doc in corpus]
else:
docs = corpus
collection.add(
documents=docs,
ids=[f"msg_{i}" for i in range(len(corpus))],
metadatas=[{"speaker": s, "idx": i} for i, s in enumerate(corpus_speakers)],
)
results = collection.query(
query_texts=[question],
n_results=min(top_k, len(corpus)),
include=["documents", "metadatas"],
)
# Check if any retrieved message matches evidence
retrieved_indices = [m["idx"] for m in results["metadatas"][0]]
retrieved_texts = [corpus[i].strip().lower() for i in retrieved_indices]
found = 0
for ev_text in evidence_texts:
for ret_text in retrieved_texts:
if ev_text in ret_text or ret_text in ev_text:
found += 1
break
recall = found / len(evidence_texts) if evidence_texts else 1.0
return recall, {
"retrieved_count": len(retrieved_indices),
"evidence_count": len(evidence_texts),
"found": found,
}
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
# =============================================================================
# BENCHMARK RUNNER
# =============================================================================
def run_benchmark(categories, limit_per_cat, top_k, mode, cache_dir, out_file):
"""Run the ConvoMem retrieval benchmark."""
print(f"\n{'=' * 60}")
print(" MemPal × ConvoMem Benchmark")
print(f"{'=' * 60}")
print(f" Categories: {len(categories)}")
print(f" Limit/cat: {limit_per_cat}")
print(f" Top-k: {top_k}")
print(f" Mode: {mode}")
print(f"{'' * 60}")
print("\n Loading data from HuggingFace...\n")
items = load_evidence_items(categories, limit_per_cat, cache_dir)
print(f"\n Total items: {len(items)}")
print(f"{'' * 60}\n")
all_recall = []
per_category = defaultdict(list)
results_log = []
start_time = datetime.now()
for i, item in enumerate(items):
question = item["question"]
answer = item.get("answer", "")
cat_key = item.get("_category_key", "unknown")
CATEGORIES.get(cat_key, cat_key)
recall, details = retrieve_for_item(item, top_k=top_k, mode=mode)
all_recall.append(recall)
per_category[cat_key].append(recall)
results_log.append(
{
"question": question,
"answer": answer,
"category": cat_key,
"recall": recall,
"details": details,
}
)
status = "HIT" if recall >= 1.0 else ("part" if recall > 0 else "miss")
if (i + 1) % 20 == 0 or i == len(items) - 1:
print(
f" [{i + 1:4}/{len(items)}] avg_recall={sum(all_recall) / len(all_recall):.3f} last={status}"
)
elapsed = (datetime.now() - start_time).total_seconds()
avg_recall = sum(all_recall) / len(all_recall) if all_recall else 0
print(f"\n{'=' * 60}")
print(f" RESULTS — MemPal ({mode} mode, top-{top_k})")
print(f"{'=' * 60}")
print(f" Time: {elapsed:.1f}s ({elapsed / max(len(items), 1):.2f}s per item)")
print(f" Items: {len(items)}")
print(f" Avg Recall: {avg_recall:.3f}")
print("\n PER-CATEGORY RECALL:")
for cat_key in sorted(per_category.keys()):
vals = per_category[cat_key]
avg = sum(vals) / len(vals)
name = CATEGORIES.get(cat_key, cat_key)
perfect = sum(1 for v in vals if v >= 1.0)
print(f" {name:25} R={avg:.3f} perfect={perfect}/{len(vals)}")
perfect_total = sum(1 for r in all_recall if r >= 1.0)
zero_total = sum(1 for r in all_recall if r == 0)
print("\n DISTRIBUTION:")
print(f" Perfect (1.0): {perfect_total:4} ({perfect_total / len(all_recall) * 100:.1f}%)")
print(f" Zero (0.0): {zero_total:4} ({zero_total / len(all_recall) * 100:.1f}%)")
print(f"\n{'=' * 60}\n")
if out_file:
with open(out_file, "w") as f:
json.dump(results_log, f, indent=2)
print(f" Results saved to: {out_file}")
# =============================================================================
# CLI
# =============================================================================
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="MemPal × ConvoMem Benchmark")
parser.add_argument("--limit", type=int, default=100, help="Items per category (default: 100)")
parser.add_argument("--top-k", type=int, default=10, help="Top-k retrieval (default: 10)")
parser.add_argument(
"--category",
choices=list(CATEGORIES.keys()) + ["all"],
default="all",
help="Category to test (default: all)",
)
parser.add_argument(
"--mode",
choices=["raw", "aaak"],
default="raw",
help="Retrieval mode",
)
parser.add_argument("--cache-dir", default="/tmp/convomem_cache", help="Cache directory")
parser.add_argument("--out", default=None, help="Output JSON file")
args = parser.parse_args()
if args.category == "all":
categories = list(CATEGORIES.keys())
else:
categories = [args.category]
if not args.out:
args.out = f"benchmarks/results_convomem_{args.mode}_top{args.top_k}_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
run_benchmark(categories, args.limit, args.top_k, args.mode, args.cache_dir, args.out)
-508
View File
@@ -1,508 +0,0 @@
{
"dev": [
"cc06de0d",
"f9e8c073",
"b320f3f8",
"a89d7624",
"311778f1",
"gpt4_59c863d7",
"bbf86515",
"099778bb",
"e831120c",
"dcfa8644",
"8fb83627",
"e66b632c",
"gpt4_7fce9456",
"55241a1f",
"352ab8bd",
"f4f1d8a4",
"830ce83f",
"2311e44b",
"09ba9854",
"gpt4_a1b77f9c",
"07741c45",
"gpt4_70e84552",
"b46e15ee",
"6071bd76",
"6f9b354f",
"1d4da289",
"gpt4_8279ba02",
"6456829e_abs",
"0db4c65d",
"d6062bb9",
"60bf93ed_abs",
"d3ab962e",
"87f22b4a",
"e01b8e2f",
"gpt4_7ddcf75f",
"8ebdbe50",
"26bdc477",
"29f2956b_abs",
"2311e44b_abs",
"75f70248",
"852ce960",
"f0e564bc",
"fca70973",
"3c1045c8",
"18bc8abd",
"afdc33df",
"54026fce",
"b9cfe692",
"6456829e",
"e6041065"
],
"held_out": [
"gpt4_15e38248",
"gpt4_2ba83207",
"2133c1b5_abs",
"gpt4_8279ba03",
"76d63226",
"1192316e",
"gpt4_fa19884d",
"gpt4_372c3eed_abs",
"1a8a66a6",
"gpt4_fe651585",
"e25c3b8d",
"945e3d21",
"86b68151",
"1c0ddc50",
"1e043500",
"d682f1a2",
"gpt4_b5700ca0",
"91b15a6e",
"ce6d2d27",
"f523d9fe",
"7024f17c",
"8752c811",
"gpt4_f420262d",
"d01c6aa8",
"4b24c848",
"7e974930",
"3fdac837",
"gpt4_b4a80587",
"c18a7dc8",
"80ec1f4f_abs",
"7527f7e2",
"6ade9755",
"89941a94",
"gpt4_1d80365e",
"2133c1b5",
"06db6396",
"gpt4_88806d6e",
"88432d0a",
"3ba21379",
"0862e8bf",
"aae3761f",
"5025383b",
"gpt4_e061b84f",
"73d42213",
"4bc144e2",
"gpt4_5501fe77",
"00ca467f",
"dfde3500",
"01493427",
"b6025781",
"a96c20ee_abs",
"982b5123_abs",
"gpt4_fa19884c",
"gpt4_1a1dc16d",
"28dc39ac",
"gpt4_2d58bcd6",
"51c32626",
"c4ea545c",
"1da05512",
"gpt4_385a5000",
"577d4d32",
"72e3ee87",
"f4f1d8a4_abs",
"9d25d4e0",
"b29f3365",
"b759caee",
"10e09553",
"1d4e3b97",
"d52b4f67",
"gpt4_e072b769",
"58ef2f1c",
"6e984301",
"41275add",
"gpt4_59149c77",
"2ebe6c90",
"1cea1afa",
"gpt4_1e4a8aec",
"6c49646a",
"8a2466db",
"gpt4_65aabe59",
"gpt4_93159ced",
"51a45a95",
"af8d2e46",
"561fabcd",
"370a8ff4",
"gpt4_d84a3211",
"gpt4_7a0daae1",
"2a1811e2",
"gpt4_78cf46a3",
"1568498a",
"6b7dfb22",
"6ae235be",
"bc8a6e93_abs",
"681a1674",
"06878be2",
"1a1907b4",
"0e4e4c46",
"gpt4_85da3956",
"gpt4_f420262c",
"2bf43736",
"bc149d6b",
"09d032c9",
"5c40ec5b",
"eac54adc",
"993da5e2",
"71a3fd6b",
"gpt4_0b2f1d21",
"ad7109d1",
"4c36ccef",
"c8c3f81d",
"edced276_abs",
"0bc8ad92",
"gpt4_468eb064",
"2ebe6c92",
"cc6d1ec1",
"4dfccbf8",
"95228167",
"ba358f49",
"45dc21b6",
"db467c8c",
"720133ac",
"67e0d0f2",
"cc5ded98",
"726462e0",
"4100d0a0",
"3a704032",
"gpt4_7ca326fa",
"ec81a493",
"618f13b2",
"58470ed2",
"gpt4_4fc4f797",
"60036106",
"157a136e",
"6222b6eb",
"69fee5aa",
"19b5f2b3_abs",
"gpt4_d12ceb0e",
"51b23612",
"2318644b",
"3fe836c9",
"gpt4_7de946e7",
"71017277",
"f0853d11",
"dc439ea3",
"gpt4_2f91af09",
"9a707b81",
"bc8a6e93",
"c14c00dd",
"8979f9ec",
"cf22b7bf",
"gpt4_ec93e27f",
"gpt4_468eb063",
"41698283",
"1de5cff2",
"21d02d0d",
"c7cf7dfd",
"gpt4_ab202e7f",
"dccbc061",
"078150f1",
"e3038f8c",
"gpt4_c27434e8_abs",
"2698e78f",
"031748ae_abs",
"gpt4_59149c78",
"c8f1aeed",
"184da446",
"gpt4_b5700ca9",
"89527b6b",
"0977f2af",
"853b0a1d",
"a346bb18",
"3249768e",
"gpt4_2f8be40d",
"gpt4_93159ced_abs",
"eeda8a6d",
"7a8d0b71",
"95bcc1c8",
"gpt4_2487a7cb",
"85fa3a3f",
"7e00a6cb",
"e3fc4d6e",
"59524333",
"37f165cf",
"0ddfec37",
"60bf93ed",
"d7c942c3",
"80ec1f4f",
"ceb54acb",
"9aaed6a3",
"gpt4_4929293a",
"ed4ddc30",
"545bd2b5",
"2788b940",
"ef9cf60a",
"gpt4_7f6b06db",
"0ea62687",
"3d86fd0a",
"3e321797",
"d24813b1",
"38146c39",
"efc3f7c2",
"7401057b",
"5809eb10",
"28bcfaac",
"1903aded",
"gpt4_194be4b3",
"gpt4_e414231f",
"0ddfec37_abs",
"c2ac3c61",
"gpt4_4ef30696",
"1f2b8d4f",
"0f05491a",
"8550ddae",
"8077ef71",
"b86304ba",
"e61a7584",
"8cf51dda",
"gpt4_2f584639",
"08e075c7",
"5d3d2817",
"7405e8b1",
"a3045048",
"gpt4_731e37d7",
"c8090214_abs",
"36580ce8",
"ba358f49_abs",
"gpt4_d6585ce8",
"e56a43b9",
"2c63a862",
"gpt4_5438fa52",
"07b6f563",
"gpt4_31ff4165",
"0bb5a684",
"71315a70",
"gpt4_cd90e484",
"gpt4_8c8961ae",
"gpt4_fe651585_abs",
"36b9f61e",
"gpt4_b0863698",
"gpt4_1d4ab0c9",
"15745da0_abs",
"0862e8bf_abs",
"bcbe585f",
"a2f3aa27",
"gpt4_6dc9b45b",
"ccb36322",
"f685340e",
"9ea5eabc",
"gpt4_372c3eed",
"37d43f65",
"bf659f65",
"b0479f84",
"gpt4_213fd887",
"e4e14d04",
"f8c5f88b",
"gpt4_18c2b244",
"a11281a2",
"gpt4_2655b836",
"e47becba",
"gpt4_74aed68e",
"gpt4_af6db32f",
"6cb6f249",
"77eafa52",
"gpt4_93f6379c",
"e8a79c70",
"7a87bd0c",
"gpt4_6ed717ea",
"d6233ab6",
"c19f7a0b",
"gpt4_61e13b3c",
"d23cf73b",
"gpt4_1e4a8aeb",
"ba61f0b9",
"118b2229",
"488d3006",
"c4a1ceb8",
"8e91e7d9",
"42ec0761",
"65240037",
"fea54f57",
"c8090214",
"b01defab",
"6aeb4375_abs",
"faba32e5",
"c5e8278d",
"gpt4_e414231e",
"eeda8a6d_abs",
"gpt4_8e165409",
"af082822",
"22d2cb42",
"92a0aa75",
"1c549ce4",
"25e5aa4f",
"gpt4_68e94288",
"4baee567",
"18dcd5a5",
"dad224aa",
"gpt4_f2262a51",
"29f2956b",
"21436231",
"19b5f2b3",
"gpt4_1916e0ea",
"gpt4_45189cb4",
"0a995998",
"b6019101",
"9bbe84a2",
"61f8c8f8",
"9a707b82",
"8cf4d046",
"eac54add",
"75832dbd",
"gpt4_98f46fc6",
"d596882b",
"88432d0a_abs",
"16c90bf4",
"f685340e_abs",
"b5ef892d",
"gpt4_f49edff3",
"gpt4_483dd43c",
"bb7c3b45",
"gpt4_7abb270c",
"gpt4_9a159967",
"07741c44",
"4d6b87c8",
"6aeb4375",
"gpt4_d6585ce9",
"60472f9c",
"caf9ead2",
"32260d93",
"60159905",
"0a34ad58",
"a40e080f",
"10d9b85a",
"a06e4cfe",
"4f54b7c9",
"6613b389",
"70b3e69b",
"gpt4_7bc6cf22",
"gpt4_0a05b494",
"778164c6",
"195a1a1b",
"8464fc84",
"b46e15ed",
"603deb26",
"eaca4986",
"2698e78f_abs",
"gpt4_21adecb5",
"2e6d26dc",
"5831f84d",
"08f4fc43",
"3f1e9474",
"c9f37c46",
"gpt4_2f56ae70",
"1b9b7252",
"35a27287",
"gpt4_d31cdae3",
"129d1232",
"4adc0475",
"27016adc",
"46a3abf7",
"9ee3ecd6",
"982b5123",
"09ba9854_abs",
"0e5e2d1a",
"e9327a54",
"86f00804",
"e982271f",
"7161e7e2",
"57f827a0",
"6a27ffc2",
"edced276",
"gpt4_d9af6064",
"75499fd8",
"60d45044",
"gpt4_70e84552_abs",
"2ce6a0f2",
"gpt4_4929293b",
"a1cc6108",
"gpt4_5dcc0aab",
"a3838d2b",
"c7dc5443",
"505af2f5",
"gpt4_68e94287",
"15745da0",
"0100672e",
"a82c026e",
"5e1b23de",
"71017276",
"89941a93",
"6b168ec8",
"affe2881",
"0edc2aef",
"gpt4_2312f94c",
"a4996e51",
"c6853660",
"ef66a6e5",
"8a137a7f",
"a96c20ee",
"fca762bc",
"ac031881",
"d905b33f",
"e493bb7c",
"a9f6b44c",
"dd2973ad",
"8aef76bc",
"f35224e0",
"8b9d4367",
"gpt4_c27434e8",
"gpt4_a56e767c",
"eace081b",
"5a4f22c0",
"58bf7951",
"c4f10528",
"50635ada",
"06f04340",
"0bc8ad93",
"e5ba910e_abs",
"5a7937c8",
"a3332713",
"4388e9dd",
"8c18457d",
"gpt4_2c50253f",
"6a1eabeb",
"b3c15d39",
"gpt4_e061b84g",
"3b6f954b",
"gpt4_76048e76",
"4dfccbf7",
"2b8f3739",
"d851d5ba",
"4fd1909e",
"94f70d80",
"66f24dbb",
"a08a253f",
"6e984302",
"001be529",
"gpt4_a2d1d1f6",
"cc539528",
"e48988bc",
"gpt4_4cd9eba1",
"8e9d538c",
"a1eacc2a",
"6d550036",
"gpt4_e05b82a6",
"81507db6",
"caf03d32",
"031748ae",
"c960da58",
"1faac195",
"gpt4_4edbafa2"
],
"seed": 42,
"dev_size": 50
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-470
View File
@@ -1,470 +0,0 @@
#!/usr/bin/env python3
"""
MemPal × MemBench Benchmark
============================
MemBench (ACL 2025): https://aclanthology.org/2025.findings-acl.989/
Data: https://github.com/import-myself/Membench
MemBench tests memory across multi-turn conversations in multiple categories:
- highlevel: inferences requiring aggregation across turns ("what kind of X do I prefer?")
- lowlevel: single-turn fact recall ("what X did I mention?")
- knowledge_update: facts that change over time
- comparative: comparing two items mentioned across turns
- conditional: conditional reasoning over remembered facts
- noisy: distractors / irrelevant info mixed in
- aggregative: combining info from multiple turns
- RecMultiSession: recommendations across multiple topic sessions
Each item has:
- message_list[0]: list of turns [{user, assistant, time, place}]
- QA: {question, answer, choices (A/B/C/D), ground_truth, target_step_id}
We measure RETRIEVAL RECALL: is the answer-relevant turn in the top-K retrieved?
We also score ACCURACY: does the top-retrieved turn's context match ground_truth?
Usage:
python benchmarks/membench_bench.py /tmp/membench/MemData/FirstAgent
python benchmarks/membench_bench.py /tmp/membench/MemData/FirstAgent --category highlevel
python benchmarks/membench_bench.py /tmp/membench/MemData/FirstAgent --limit 50
"""
import sys
import json
import re
import argparse
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import chromadb
sys.path.insert(0, str(Path(__file__).parent.parent))
# ── Shared ephemeral ChromaDB client ──────────────────────────────────────────
_bench_client = chromadb.EphemeralClient()
def _fresh_collection(name="membench_drawers"):
try:
_bench_client.delete_collection(name)
except Exception:
pass
return _bench_client.create_collection(name)
# ── Stop words (same as locomo_bench) ─────────────────────────────────────────
STOP_WORDS = {
"what",
"when",
"where",
"who",
"how",
"which",
"did",
"do",
"was",
"were",
"have",
"has",
"had",
"is",
"are",
"the",
"a",
"an",
"my",
"me",
"i",
"you",
"your",
"their",
"it",
"its",
"in",
"on",
"at",
"to",
"for",
"of",
"with",
"by",
"from",
"ago",
"last",
"that",
"this",
"there",
"about",
"get",
"got",
"give",
"gave",
"buy",
"bought",
"made",
"make",
"said",
"would",
"could",
"should",
"might",
"can",
"will",
"shall",
"kind",
"type",
"like",
"prefer",
"enjoy",
"think",
"feel",
}
NOT_NAMES = {
"What",
"When",
"Where",
"Who",
"How",
"Which",
"Did",
"Do",
"Was",
"Were",
"Have",
"Has",
"Had",
"Is",
"Are",
"The",
"My",
"Our",
"I",
"It",
"Its",
"This",
"That",
"These",
"Those",
}
def _kw(text):
words = re.findall(r"\b[a-z]{3,}\b", text.lower())
return [w for w in words if w not in STOP_WORDS]
def _kw_overlap(query_kws, doc_text):
if not query_kws:
return 0.0
doc_lower = doc_text.lower()
hits = sum(1 for kw in query_kws if kw in doc_lower)
return hits / len(query_kws)
def _person_names(text):
words = re.findall(r"\b[A-Z][a-z]{2,15}\b", text)
return list(set(w for w in words if w not in NOT_NAMES))
# ── MemBench data loading ─────────────────────────────────────────────────────
CATEGORY_FILES = {
"simple": "simple.json",
"highlevel": "highlevel.json",
"knowledge_update": "knowledge_update.json",
"comparative": "comparative.json",
"conditional": "conditional.json",
"noisy": "noisy.json",
"aggregative": "aggregative.json",
"highlevel_rec": "highlevel_rec.json",
"lowlevel_rec": "lowlevel_rec.json",
"RecMultiSession": "RecMultiSession.json",
"post_processing": "post_processing.json",
}
def load_membench(data_dir: str, categories=None, topic="movie", limit=0):
"""
Load MemBench questions from the FirstAgent directory.
Returns list of dicts:
{category, topic, tid, turns, question, choices, ground_truth, target_step_ids}
"""
data_dir = Path(data_dir)
if categories is None:
categories = list(CATEGORY_FILES.keys())
items = []
for cat in categories:
fname = CATEGORY_FILES.get(cat)
if not fname:
continue
fpath = data_dir / fname
if not fpath.exists():
continue
with open(fpath) as f:
raw = json.load(f)
# Files have two formats:
# topic-keyed: {"movie": [...], "food": [...], "book": [...]}
# role-keyed: {"roles": [...], "events": [...]}
# For topic-keyed, filter by topic arg. For role-keyed, use key as the "topic".
for t, topic_items in raw.items():
if topic and t not in (topic, "roles", "events"):
continue
for item in topic_items:
turns = item.get("message_list", []) # pass full message_list (all sessions)
qa = item.get("QA", {})
if not turns or not qa:
continue
items.append(
{
"category": cat,
"topic": t,
"tid": item.get("tid", 0),
"turns": turns,
"question": qa.get("question", ""),
"choices": qa.get("choices", {}),
"ground_truth": qa.get("ground_truth", ""),
"answer_text": qa.get("answer", ""),
"target_step_ids": qa.get("target_step_id", []),
}
)
if limit > 0:
items = items[:limit]
return items
# ── Indexing ──────────────────────────────────────────────────────────────────
def _turn_text(turn: dict) -> str:
"""Extract text from a turn regardless of field naming convention."""
user = turn.get("user") or turn.get("user_message", "")
asst = turn.get("assistant") or turn.get("assistant_message", "")
time = turn.get("time", "")
text = f"[User] {user} [Assistant] {asst}"
if time:
text = f"[{time}] " + text
return text
def index_turns(collection, message_list, item_key: str):
"""
Index all turns from all sessions into the collection.
message_list can be:
- Flat list of turns: [turn, turn, ...] (highlevel.json format)
- List of sessions: [[turn, turn], [turn, turn], ...] (simple.json format)
Each turn keyed by 'sid' if present, else by positional index.
Returns number of turns indexed.
"""
docs, ids, metas = [], [], []
# Normalize: flat list of dicts → wrap as one session
if message_list and isinstance(message_list[0], dict):
sessions = [message_list]
else:
sessions = message_list
global_idx = 0
for s_idx, session in enumerate(sessions):
if not isinstance(session, list):
continue
for t_idx, turn in enumerate(session):
if not isinstance(turn, dict):
continue
sid = turn.get("sid", turn.get("mid"))
doc_id = f"{item_key}_g{global_idx}"
text = _turn_text(turn)
docs.append(text)
ids.append(doc_id)
metas.append(
{
"item_key": item_key,
"sid": int(sid) if isinstance(sid, (int, float)) else global_idx,
"s_idx": s_idx,
"t_idx": t_idx,
"global_idx": global_idx,
}
)
global_idx += 1
if docs:
collection.add(documents=docs, ids=ids, metadatas=metas)
return len(docs)
# ── Scoring ───────────────────────────────────────────────────────────────────
def run_membench(
data_dir, categories=None, topic="movie", top_k=5, limit=0, mode="raw", out_file=None
):
"""Run MemBench retrieval evaluation."""
items = load_membench(data_dir, categories=categories, topic=topic, limit=limit)
if not items:
print(f"No items found in {data_dir}")
return
print(f"\n{'=' * 58}")
print(" MemPal × MemBench")
print(f"{'=' * 58}")
print(f" Data dir: {data_dir}")
print(f" Categories: {', '.join(categories or ['all'])}")
print(f" Topic: {topic or 'all'}")
print(f" Items: {len(items)}")
print(f" Top-k: {top_k}")
print(f" Mode: {mode}")
print(f"{'' * 58}\n")
results = []
by_cat = defaultdict(lambda: {"hit_at_k": 0, "total": 0})
total_hit = 0
for idx, item in enumerate(items, 1):
item_key = f"{item['category']}_{item['topic']}_{idx}" # idx ensures unique key
collection = _fresh_collection()
# Index all turns from all sessions
n_indexed = index_turns(collection, item["turns"], item_key)
if n_indexed < 1:
continue
question = item["question"]
n_retrieve = min(top_k * 3 if mode == "hybrid" else top_k, n_indexed)
if n_retrieve < 1:
continue
# Retrieve
res = collection.query(
query_texts=[question],
n_results=n_retrieve,
include=["distances", "metadatas", "documents"],
)
retrieved_sids = [m["sid"] for m in res["metadatas"][0]]
retrieved_global = [m["global_idx"] for m in res["metadatas"][0]]
retrieved_docs = res["documents"][0]
raw_distances = res["distances"][0]
# Hybrid re-scoring: predicate keywords (person names excluded)
if mode == "hybrid":
names = _person_names(question)
name_words = {n.lower() for n in names}
all_kws = _kw(question)
predicate_kws = [w for w in all_kws if w not in name_words]
scored = []
for dist, sid, gidx, doc in zip(
raw_distances, retrieved_sids, retrieved_global, retrieved_docs
):
pred_overlap = _kw_overlap(predicate_kws, doc)
fused = dist * (1.0 - 0.50 * pred_overlap)
scored.append((fused, sid, gidx, doc))
scored.sort(key=lambda x: x[0])
retrieved_sids = [x[1] for x in scored[:top_k]]
retrieved_global = [x[2] for x in scored[:top_k]]
else:
retrieved_sids = retrieved_sids[:top_k]
retrieved_global = retrieved_global[:top_k]
# Check if any target turn is retrieved.
# target_step_id format varies: [sid, ?] or [global_idx, ?]
# Try matching against both sid and global_idx.
target_sids = set()
for step in item["target_step_ids"]:
if isinstance(step, list) and len(step) >= 1:
target_sids.add(step[0]) # first element is the turn sid/global index
hit = bool(target_sids & set(retrieved_sids)) or bool(target_sids & set(retrieved_global))
if hit:
total_hit += 1
by_cat[item["category"]]["hit_at_k"] += 1
by_cat[item["category"]]["total"] += 1
results.append(
{
"category": item["category"],
"topic": item["topic"],
"tid": item["tid"],
"question": question,
"ground_truth": item["ground_truth"],
"answer_text": item["answer_text"],
"target_sids": list(target_sids),
"retrieved_sids": retrieved_sids,
"retrieved_global": retrieved_global,
"hit_at_k": hit,
}
)
if idx % 50 == 0:
running_pct = total_hit / idx * 100
print(f" [{idx:4}/{len(items)}] running R@{top_k}: {running_pct:.1f}%")
# Final results
overall = total_hit / len(items) * 100 if items else 0
print(f"\n{'=' * 58}")
print(f" RESULTS — MemPal on MemBench ({mode} mode, top-{top_k})")
print(f"{'=' * 58}")
print(f"\n Overall R@{top_k}: {overall:.1f}% ({total_hit}/{len(items)})\n")
print(" By category:")
for cat, v in sorted(by_cat.items()):
pct = v["hit_at_k"] / v["total"] * 100 if v["total"] else 0
print(f" {cat:20} {pct:5.1f}% ({v['hit_at_k']}/{v['total']})")
print(f"\n{'=' * 58}\n")
if out_file:
with open(out_file, "w") as f:
json.dump(results, f, indent=2)
print(f" Results saved to: {out_file}")
return results
# ── CLI ───────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="MemPal × MemBench Benchmark")
parser.add_argument("data_dir", help="Path to MemBench FirstAgent directory")
parser.add_argument(
"--category",
default=None,
choices=list(CATEGORY_FILES.keys()),
help="Run a single category (default: all)",
)
parser.add_argument(
"--topic", default="movie", help="Topic filter: movie, food, book (default: movie)"
)
parser.add_argument("--top-k", type=int, default=5, help="Retrieval top-k (default: 5)")
parser.add_argument("--limit", type=int, default=0, help="Limit items (0 = all)")
parser.add_argument(
"--mode",
choices=["raw", "hybrid"],
default="hybrid",
help="Retrieval mode (default: hybrid)",
)
parser.add_argument("--out", default=None, help="Output JSON file (default: auto-named)")
args = parser.parse_args()
if not args.out:
cat_tag = f"_{args.category}" if args.category else "_all"
args.out = (
f"benchmarks/results_membench_{args.mode}{cat_tag}_{args.topic}"
f"_top{args.top_k}_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
)
cats = [args.category] if args.category else None
run_membench(
args.data_dir,
categories=cats,
topic=args.topic,
top_k=args.top_k,
limit=args.limit,
mode=args.mode,
out_file=args.out,
)
-301
View File
@@ -1,301 +0,0 @@
"""Mining throughput benchmark: per-chunk vs batched upsert, CPU vs GPU.
Compares the legacy per-chunk ``add_drawer`` loop against the batched
``collection.upsert`` path introduced in the "batched upsert + GPU" PR.
Runs both paths on an identical seeded synthetic corpus, reports
wall-clock time + drawers/sec, and prints a markdown table suitable
for pasting into a PR description.
Usage
-----
# CPU (whatever onnxruntime is installed — CPU if you don't have
# onnxruntime-gpu):
uv run python benchmarks/mine_bench.py
# GPU (NVIDIA):
uv venv /tmp/gpu && source /tmp/gpu/bin/activate
uv pip install -e '.[gpu]' 'nvidia-cudnn-cu12>=9,<10' \\
'nvidia-cuda-runtime-cu12' 'nvidia-cublas-cu12'
export LD_LIBRARY_PATH=$(python -c "import nvidia.cudnn, os; \\
print(os.path.dirname(nvidia.cudnn.__file__)+'/lib')"):$LD_LIBRARY_PATH
MEMPALACE_EMBEDDING_DEVICE=cuda python benchmarks/mine_bench.py
Flags
-----
--device cpu|cuda|coreml|dml|auto Override MEMPALACE_EMBEDDING_DEVICE
--scenarios small,medium,large Which scenarios to run
--seed 42 RNG seed for reproducibility
"""
from __future__ import annotations
import argparse
import hashlib
import os
import random
import shutil
import string
import sys
import tempfile
import time
from datetime import datetime
from pathlib import Path
def build_corpus(dest: Path, n_files: int, paragraphs_per_file: int, seed: int) -> None:
"""Generate ``n_files`` markdown files of random words under ``dest``."""
rng = random.Random(seed)
dest.mkdir(parents=True, exist_ok=True)
for i in range(n_files):
paragraphs = []
for _ in range(paragraphs_per_file):
words = [
"".join(rng.choices(string.ascii_lowercase, k=rng.randint(3, 10)))
for _ in range(12)
]
paragraphs.append(" ".join(words))
(dest / f"doc_{i:03d}.md").write_text("\n\n".join(paragraphs))
(dest / "mempalace.yaml").write_text(
"wing: bench\n"
"rooms:\n"
" - name: general\n"
" description: all\n"
" keywords: [general]\n"
)
def _process_file_unbatched(filepath, project_path, collection, wing, rooms, agent, closets_col):
"""Legacy per-chunk upsert path (pre-batching).
Reproduces the exact loop shape the miner used before this PR so the
comparison is apples-to-apples; only the upsert granularity differs.
"""
from mempalace import miner
from mempalace.palace import (
build_closet_lines,
file_already_mined,
mine_lock,
purge_file_closets,
upsert_closet_lines,
)
source_file = str(filepath)
if file_already_mined(collection, source_file, check_mtime=True):
return 0, "general"
try:
content = filepath.read_text(encoding="utf-8", errors="replace")
except OSError:
return 0, "general"
content = content.strip()
if len(content) < miner.MIN_CHUNK_SIZE:
return 0, "general"
room = miner.detect_room(filepath, content, rooms, project_path)
chunks = miner.chunk_text(content, source_file)
with mine_lock(source_file):
if file_already_mined(collection, source_file, check_mtime=True):
return 0, room
try:
collection.delete(where={"source_file": source_file})
except Exception:
pass
drawers_added = 0
for chunk in chunks:
miner.add_drawer(
collection=collection,
wing=wing,
room=room,
content=chunk["content"],
source_file=source_file,
chunk_index=chunk["chunk_index"],
agent=agent,
)
drawers_added += 1
if closets_col and drawers_added > 0:
drawer_ids = [
f"drawer_{wing}_{room}_"
f"{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
for c in chunks
]
closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room)
closet_id_base = (
f"closet_{wing}_{room}_"
f"{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
)
closet_meta = {
"wing": wing,
"room": room,
"source_file": source_file,
"drawer_count": drawers_added,
"filed_at": datetime.now().isoformat(),
"normalize_version": miner.NORMALIZE_VERSION,
}
purge_file_closets(closets_col, source_file)
upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta)
return drawers_added, room
def mine_once(project_dir: str, palace_path: str, batched: bool) -> tuple[int, float]:
"""Mine a project dir with either the batched (new) or per-chunk (old) path."""
from mempalace import miner
from mempalace.miner import load_config, scan_project
from mempalace.palace import get_closets_collection, get_collection
project_path = Path(project_dir).resolve()
config = load_config(project_dir)
wing = config["wing"]
rooms = config.get("rooms", [])
files = scan_project(project_dir)
collection = get_collection(palace_path)
closets = get_closets_collection(palace_path)
total = 0
t0 = time.perf_counter()
for filepath in files:
if batched:
drawers, _ = miner.process_file(
filepath=filepath,
project_path=project_path,
collection=collection,
wing=wing,
rooms=rooms,
agent="bench",
dry_run=False,
closets_col=closets,
)
else:
drawers, _ = _process_file_unbatched(
filepath, project_path, collection, wing, rooms, "bench", closets
)
total += drawers
return total, time.perf_counter() - t0
def _reset_backend_caches() -> None:
"""Drop the in-process client cache so each run pays cold-open cost equally."""
from mempalace.palace import _DEFAULT_BACKEND
_DEFAULT_BACKEND._clients.clear()
_DEFAULT_BACKEND._freshness.clear()
def run_scenario(label: str, n_files: int, paragraphs_per_file: int, seed: int) -> dict:
"""Run one scenario under both code paths and return a result dict."""
print(f"\n=== {label}: {n_files} files × {paragraphs_per_file} paragraphs ===")
results = {}
for mode in ("unbatched", "batched"):
tmp = Path(tempfile.mkdtemp(prefix=f"mp_{mode}_"))
try:
proj = tmp / "proj"
palace = tmp / "palace"
build_corpus(proj, n_files, paragraphs_per_file, seed=seed)
_reset_backend_caches()
drawers, dt = mine_once(str(proj), str(palace), batched=(mode == "batched"))
rate = drawers / dt if dt > 0 else 0.0
results[mode] = (drawers, dt, rate)
print(f" {mode:10} {drawers:5} drawers in {dt:6.2f}s → {rate:7.1f} drawers/sec")
finally:
shutil.rmtree(tmp, ignore_errors=True)
_, t_u, r_u = results["unbatched"]
d_b, t_b, r_b = results["batched"]
speedup = t_u / t_b if t_b > 0 else 0.0
print(f" speedup: {speedup:.2f}× ({t_u:.2f}s → {t_b:.2f}s)")
return {
"label": label,
"n_files": n_files,
"paragraphs": paragraphs_per_file,
"drawers": d_b,
"unbatched_time": t_u,
"unbatched_rate": r_u,
"batched_time": t_b,
"batched_rate": r_b,
"speedup": speedup,
}
SCENARIOS = {
"small": ("Small files (~50 paragraphs)", 10, 50),
"medium": ("Medium files (~200 paragraphs)", 20, 200),
"large": ("Large files (~500 paragraphs)", 10, 500),
}
def _env_summary(device_label: str) -> list[str]:
"""Short hardware + version lines included with the printed table."""
import platform
try:
import chromadb
chromadb_v = chromadb.__version__
except Exception:
chromadb_v = "?"
try:
import onnxruntime as ort
ort_v = ort.__version__
providers = ",".join(p.replace("ExecutionProvider", "") for p in ort.get_available_providers())
except Exception:
ort_v = "?"
providers = "?"
return [
f"device: **{device_label}** (onnxruntime {ort_v}, providers={providers})",
f"chromadb {chromadb_v} · python {sys.version.split()[0]} · {platform.platform()}",
]
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0])
parser.add_argument(
"--device",
default=None,
help="Override MEMPALACE_EMBEDDING_DEVICE (cpu|cuda|coreml|dml|auto)",
)
parser.add_argument(
"--scenarios",
default="small,medium,large",
help="Comma-separated scenario names (default: all)",
)
parser.add_argument("--seed", type=int, default=42)
args = parser.parse_args()
if args.device:
os.environ["MEMPALACE_EMBEDDING_DEVICE"] = args.device
from mempalace.embedding import describe_device, get_embedding_function
device_label = describe_device()
print(f"Warming up ONNX model on device={device_label}...")
ef = get_embedding_function()
ef(["warmup sentence one", "warmup sentence two"])
picked = [s.strip() for s in args.scenarios.split(",") if s.strip()]
results = []
for key in picked:
if key not in SCENARIOS:
print(f"Unknown scenario {key!r}; choices: {sorted(SCENARIOS)}", file=sys.stderr)
sys.exit(2)
label, n_files, paras = SCENARIOS[key]
results.append(run_scenario(label, n_files, paras, args.seed))
print("\n\n## Mining benchmark\n")
for line in _env_summary(device_label):
print(line + " ")
print()
print("| Scenario | Files | Drawers | Per-chunk (old) | Batched (new) | Speedup |")
print("| --- | ---: | ---: | ---: | ---: | ---: |")
for r in results:
print(
f"| {r['label']} | {r['n_files']} | {r['drawers']} | "
f"{r['unbatched_time']:.2f}s · {r['unbatched_rate']:.0f} drw/s | "
f"{r['batched_time']:.2f}s · {r['batched_rate']:.0f} drw/s | "
f"**{r['speedup']:.2f}×** |"
)
if __name__ == "__main__":
main()
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+61
View File
@@ -0,0 +1,61 @@
# MemPalace Caddy reverse-proxy config.
# -----------------------------------------------------------------------------
# Listens on :8443 with a self-signed (Caddy-internal) cert. Enforces a
# bearer-token check on every request and proxies authenticated traffic to
# the mempalace container.
#
# Two upstream paths:
# /sse, /messages* -> mempalace:8765 (mcp-proxy SSE for MCP tool calls)
# /ingest* -> mempalace:8766 (in-process HTTP ingest endpoint)
# /healthz -> mempalace:8766 (no auth, for liveness probes)
#
# Token comes from the MEMPAL_TOKEN env var (set in deploy/unraid/.env).
# -----------------------------------------------------------------------------
{
# Disable the admin API — never expose it from a container that's
# reachable from clients.
admin off
# Ship access logs to stderr so `docker logs caddy` is useful.
log {
output stderr
format console
}
}
:8443 {
tls internal
# Liveness probe — no auth so Docker / external monitors can hit it
# without holding the bearer token.
handle /healthz {
reverse_proxy mempalace:8766
}
# Auth gate. matcher passes only when the Authorization header is
# exactly `Bearer ${MEMPAL_TOKEN}`. Header matching is exact-match.
@authorized header Authorization "Bearer {$MEMPAL_TOKEN}"
# MCP-over-SSE: the MCP transport sends events on /sse and accepts
# JSON-RPC POSTs on /messages (path varies by mcp-proxy version, so
# proxy the whole prefix tree).
handle @authorized {
# SSE responses are streamed — disable buffering and force HTTP/1.1
# upstream to keep the event stream open.
reverse_proxy /sse* /messages* mempalace:8765 {
flush_interval -1
transport http {
versions 1.1
}
}
reverse_proxy /ingest* mempalace:8766
}
# Default: anything not matched above (or unauthenticated traffic) is
# rejected. Returning 401 instead of 403 is correct here — clients with
# no/invalid token can re-attempt with credentials.
respond 401 {
body "Unauthorized"
close
}
}
+512
View File
@@ -0,0 +1,512 @@
# MemPalace on Unraid — server-mode deployment
This directory contains everything needed to run MemPalace as a shared
memory server on an Unraid box and connect multiple AI tools (Claude
Code, Codex, Antigravity, or any MCP-compatible client) to a single
persistent palace.
If you only use one machine, you don't need any of this — install
mempalace locally per the main [README](../../README.md) and you're
done. This guide is for users running the same AI tools across multiple
machines who want one shared memory.
---
## What you get
```
home LAN
┌───────────────────────────────────┐
│ Unraid (always on) │
│ ┌────────────────────────────┐ │
│ │ caddy :8443 (TLS + auth) │ │
│ │ ├─ /sse → mcp-proxy │ │
│ │ └─ /ingest → ingest API │ │
│ │ mempalace (single process) │ │
│ │ ├─ mcp-proxy :8765 │ │
│ │ └─ ingest :8766 │ │
│ └────────────────────────────┘ │
│ /mnt/user/appdata/mempalace/ │
│ ├─ palace/ ChromaDB │
│ ├─ kg/ knowledge graph │
│ └─ inbox/ uploaded sessions │
└───────────────────────────────────┘
│ │ │
┌─────┴─┐ ┌────┴──┐ ┌───┴──────┐
│ box A │ │ box B │ │ box C │
│ Claude│ │ Codex │ │ Antigrav │
└───────┘ └───────┘ └──────────┘
```
* **One palace, many clients.** Search and write target the same
ChromaDB index regardless of which machine you're on.
* **Auto-save hooks work across machines.** Each client's session
transcripts get pushed to the server on `Stop` and `PreCompact`
events; the server-side miner runs the existing `mine_convos`
pipeline (entity detection, room assignment, dedup, idempotency).
* **Single shared secret.** One bearer token gates both MCP and
transcript ingest at the Caddy edge.
What this is **not**: a multi-tenant cloud product. There's one palace,
one token, no per-user isolation. It's designed for a single user with
multiple machines.
---
## Files in this directory
| File | Purpose |
|---|---|
| `docker-compose.yml` | Two-container stack: `mempalace` + `caddy` sidecar. |
| `Caddyfile` | Caddy config: bearer-token auth, self-signed TLS, SSE-aware reverse proxy. |
| `mempalace-server.xml` | dockerMan template for a single-container, **no-auth, LAN-trust-only** install (compose path is the recommended one). |
| `README.md` | This file. |
The `Dockerfile` and `.dockerignore` live at the repo root — the compose
build context is `../..` so it can reach them.
---
## Prerequisites
* Unraid 6.12+ with Docker enabled (default).
* The **Compose Manager** plugin from Community Apps. Required for the
recommended (auth-enabled) path. The dockerMan template path doesn't
need it but has no auth.
* `/mnt/user/appdata` set up (default on every Unraid).
* Ports `8443` free on the Unraid host (or change in `docker-compose.yml`).
You do **not** need Tailscale, WireGuard, a domain name, a public IP,
SWAG, or NPM. The stack is self-contained.
---
## Install (recommended: compose with auth)
### 1. Get the repo onto Unraid
SSH to Unraid, pick a path on a regular share (not `/boot`, not
`/mnt/cache` directly), and clone or copy the repo:
```bash
mkdir -p /mnt/user/system/build
cd /mnt/user/system/build
git clone <your-fork-or-rsync-source> mempalace
cd mempalace/deploy/unraid
```
### 2. Mint a bearer token
```bash
TOKEN=$(openssl rand -hex 32)
echo "MEMPAL_TOKEN=$TOKEN" > .env
chmod 600 .env
echo "Token: $TOKEN" # save to a password manager — you'll set this on each client
```
`MEMPAL_TOKEN` is read from `.env` by `docker compose`. The same token
is forwarded to:
* Caddy, which checks `Authorization: Bearer <token>` on every request.
* The in-container ingest server as `MEMPALACE_INGEST_TOKEN` for
defense-in-depth.
### 3. Create the appdata directories
```bash
mkdir -p /mnt/user/appdata/mempalace \
/mnt/user/appdata/mempalace-caddy/data \
/mnt/user/appdata/mempalace-caddy/config
chown -R 99:100 /mnt/user/appdata/mempalace
chown -R 99:100 /mnt/user/appdata/mempalace-caddy
```
The Caddy data dir holds Caddy's auto-generated root CA — back it up
so re-deploys keep the same cert (clients won't have to re-trust it).
### 4. Build and start
```bash
docker compose up -d --build
```
First build downloads Python 3.13-slim and pip-installs `mempalace` +
`mcp-proxy` (~35 min on a Celeron, faster on real hardware).
### 5. Verify
```bash
# unauth'd liveness probe
curl -k https://<unraid-ip>:8443/healthz
# → {"status":"ok","version":"3.3.x"}
# bearer-checked endpoint should 401 without the token
curl -ki https://<unraid-ip>:8443/ingest/transcript
# HTTP/2 401
# ...and accept a request with it
curl -k -H "Authorization: Bearer $TOKEN" https://<unraid-ip>:8443/healthz
# → 200 OK
```
If you see all of the above, the server is up and the auth gate is
working.
### 6. (Optional) Trust Caddy's root CA on each client
Caddy's `tls internal` directive auto-generates a self-signed root CA
on first start. Clients must either trust that CA or skip TLS
verification (`-k` for curl, `MEMPAL_REMOTE_INSECURE=1` for hooks,
disabled SSL verify for the MCP client).
To trust it once and stop seeing TLS warnings:
```bash
# On Unraid:
cat /mnt/user/appdata/mempalace-caddy/data/caddy/pki/authorities/local/root.crt
```
Copy that PEM block to each Windows client and import into the
**Trusted Root Certification Authorities** store via `certmgr.msc`,
or via PowerShell:
```powershell
Import-Certificate -FilePath C:\path\to\root.crt -CertStoreLocation Cert:\LocalMachine\Root
```
---
## Connect AI tools
You'll need [`mcp-proxy`](https://github.com/sparfenyuk/mcp-proxy) on
each client machine:
```bash
uv tool install mcp-proxy
# or:
pip install mcp-proxy
```
Set environment variables persistently. **PowerShell** (Windows):
```powershell
[Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_URL", "https://<unraid-ip>:8443", "User")
[Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_TOKEN", "<the-token>", "User")
# Drop this once you've trusted Caddy's root CA:
[Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_INSECURE", "1", "User")
```
**Bash/Zsh** (macOS/Linux): add the same three exports to
`~/.zshrc` / `~/.bashrc`.
### Claude Code
Add to `~/.claude.json` (user-scoped) or `.mcp.json` in the project:
```json
{
"mcpServers": {
"mempalace": {
"command": "mcp-proxy",
"args": [
"https://<unraid-ip>:8443/sse",
"--headers", "Authorization", "Bearer <the-token>"
],
"env": {
"PYTHONHTTPSVERIFY": "0"
}
}
}
}
```
Drop the `env` block once Caddy's root CA is trusted on the client.
### Codex CLI
Add to `~/.codex/config.toml`:
```toml
[mcp_servers.mempalace]
command = "mcp-proxy"
args = [
"https://<unraid-ip>:8443/sse",
"--headers", "Authorization", "Bearer <the-token>",
]
[mcp_servers.mempalace.env]
PYTHONHTTPSVERIFY = "0"
```
### Antigravity
Antigravity uses the Windsurf-derived MCP layout. Open the IDE's
MCP settings UI (Settings → AI → MCP Servers) and add:
```json
{
"mempalace": {
"command": "mcp-proxy",
"args": [
"https://<unraid-ip>:8443/sse",
"--headers", "Authorization", "Bearer <the-token>"
]
}
}
```
Or edit `~/.antigravity/mcp.json` directly with the same shape.
### Verify each client
In any of the three tools, start a session and call:
> "Use mempalace_status to show palace stats."
Expected: a JSON blob with `total_drawers`, wing/room breakdown, etc.
A 401 means the token is wrong; a connection error means the
URL/cert is wrong.
---
## Set up auto-save hooks
The `_remote.sh` hook variants in `../../hooks/` push transcripts to
the server instead of running `mempalace mine` locally. They share the
same env-var contract as the MCP client config above.
### Claude Code
Make the scripts executable:
```bash
chmod +x hooks/mempal_save_hook_remote.sh \
hooks/mempal_precompact_hook_remote.sh
```
Add to `.claude/settings.local.json`:
```json
{
"hooks": {
"Stop": [{
"matcher": "*",
"hooks": [{
"type": "command",
"command": "/abs/path/to/hooks/mempal_save_hook_remote.sh",
"timeout": 30
}]
}],
"PreCompact": [{
"hooks": [{
"type": "command",
"command": "/abs/path/to/hooks/mempal_precompact_hook_remote.sh",
"timeout": 60
}]
}]
}
}
```
### Codex CLI
Add to `.codex/hooks.json` with the same shape — the scripts are
hook-host-agnostic.
### What the hooks do
| Hook | Trigger | Behavior |
|---|---|---|
| `mempal_save_hook_remote.sh` | Every 15 user messages (configurable via `SAVE_INTERVAL` env var) | Backgrounded `curl` POSTs the active transcript to `/ingest/transcript`. Returns immediately so the AI doesn't stall. Idempotent — failed retries are safe. |
| `mempal_precompact_hook_remote.sh` | Right before context compaction | Synchronous `curl` POST. Blocks until the upload completes (or the hook timeout fires) so memory is durable before context shrinks. |
Both write logs to `~/.mempalace/hook_state/hook.log`. Tail it during
setup to confirm uploads are landing.
### Optional env vars
| Variable | Default | Purpose |
|---|---|---|
| `MEMPAL_REMOTE_URL` | *(required)* | Server base URL, e.g. `https://unraid.local:8443`. |
| `MEMPAL_REMOTE_TOKEN` | *(required)* | Bearer token. |
| `MEMPAL_REMOTE_INSECURE` | unset | Set to `1` to skip TLS verification. Use only with `tls internal`. |
| `MEMPAL_REMOTE_WING` | unset | Force a specific wing for this client's transcripts. Default: server derives wing from session id. |
| `SAVE_INTERVAL` | `15` | Messages between save-hook fires. |
---
## Backfilling history
The hooks only capture sessions going forward. To mine **past**
transcripts into the remote palace, on each client run:
```bash
curl -k -X POST \
-H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
-H "X-Session-Id: backfill-$(hostname)-$(date +%s)" \
-H "X-Wing: backfill" \
--data-binary @/path/to/some-session.jsonl \
"$MEMPAL_REMOTE_URL/ingest/transcript"
```
For a whole directory of past sessions, loop:
```bash
for f in ~/.claude/projects/**/*.jsonl; do
curl -k -X POST \
-H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
-H "X-Session-Id: $(basename "$f" .jsonl)" \
--data-binary @"$f" \
"$MEMPAL_REMOTE_URL/ingest/transcript"
done
```
The server-side miner is idempotent — re-uploading the same transcript
won't double-file.
---
## Backups
Everything that matters lives in `/mnt/user/appdata/mempalace/`:
* `palace/` — ChromaDB vector index + SQLite metadata
* `kg/` — knowledge-graph SQLite
* `inbox/` — uploaded transcripts (kept for re-mining if needed)
Add it to your **CA Backup / Appdata Backup** schedule. Losing this
directory loses all memory.
The Caddy data dir (`/mnt/user/appdata/mempalace-caddy/data/`) is also
worth backing up — it contains the auto-generated root CA. Without it,
re-deploys regenerate the CA and clients have to re-trust it.
---
## dockerMan template (no-auth, LAN-trust-only)
If you don't want auth and trust your LAN absolutely (no other people,
no untrusted IoT, no guests), the `mempalace-server.xml` template gives
you a single-container, dockerMan-compatible install:
```bash
# Build the image:
cd /mnt/user/system/build/mempalace
docker build -t mempalace-server:latest .
# Install the template:
cp deploy/unraid/mempalace-server.xml \
/boot/config/plugins/dockerMan/templates-user/my-MemPalace.xml
```
Then in the Unraid WebUI: Docker → Add Container → "Select a template" →
**MemPalace** → Apply.
This path skips Caddy entirely. The MCP SSE endpoint is published bare
on `:8765`, no TLS, no auth. Anyone on the LAN can read and write the
palace. **Only use this if you understand and accept that.**
---
## Troubleshooting
### `mcp-proxy` connects but tool calls hang
Caddy is buffering SSE responses. Verify `flush_interval -1` is set in
the Caddyfile and that Caddy version is 2.7+ (the compose pulls
`caddy:2-alpine` which is current).
### 401 from every request
The token in the client's MCP config doesn't match the server's
`MEMPAL_TOKEN`. Print both to confirm:
```bash
# On Unraid:
grep MEMPAL_TOKEN /mnt/user/system/build/mempalace/deploy/unraid/.env
# On client (PowerShell):
[Environment]::GetEnvironmentVariable("MEMPAL_REMOTE_TOKEN", "User")
```
### `MineAlreadyRunning` errors in hook logs
Two clients hit the ingest endpoint simultaneously. The server-side
miner serializes via `mine_lock` and rejects the second one. The hook
is idempotent — the next save catches up. If you see this constantly,
raise `SAVE_INTERVAL` on the chattier client.
### Caddy logs `tls: handshake failure`
Client doesn't trust the self-signed cert. Either trust the root CA
(see step 6 in install) or set `MEMPAL_REMOTE_INSECURE=1` /
`PYTHONHTTPSVERIFY=0` on that client.
### Container can't start: "address already in use"
Port 8443 is taken (commonly by Unraid's WebUI HTTPS or another
service). Edit `docker-compose.yml` and change the host-side mapping:
```yaml
ports:
- "9443:8443" # change 9443 to whatever's free
```
Update `MEMPAL_REMOTE_URL` on every client to match.
### Embedding model download stalls on first request
The ~80 MB MiniLM ONNX model downloads from HuggingFace on first
use. Slow connections can time out the initial mining call. Pre-warm
it manually:
```bash
docker exec mempalace python -c \
"from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2; ONNXMiniLM_L6_V2()(['warmup'])"
```
Subsequent uses load from `/data/.cache/chroma/` — ~50 ms.
### Logs
```bash
docker logs mempalace # MCP server, ingest server
docker logs mempalace-caddy # auth gate, TLS, access logs
tail -f ~/.mempalace/hook_state/hook.log # client-side hook activity
```
---
## Updating
When this repo updates upstream:
```bash
cd /mnt/user/system/build/mempalace
git pull
cd deploy/unraid
docker compose up -d --build
```
Compose only rebuilds the `mempalace` service (the image hash
changes); Caddy is pinned to `caddy:2-alpine` and pulls latest within
the 2.x line.
Your palace data and Caddy CA persist across rebuilds because they're
on volumes outside the container.
---
## Going further
* **Replace self-signed TLS with Let's Encrypt** — point a real domain at
Unraid (DDNS or otherwise), open port 80 for ACME challenge, and
change `tls internal` in `Caddyfile` to `tls your@email`. Caddy
handles the rest.
* **Put behind SWAG / Nginx Proxy Manager** — drop the Caddy sidecar,
keep `mempalace` exposing 8765/8766 internally only, and add the
routes to your existing reverse proxy. Bearer-token auth and SSE
pass-through must be configured manually.
* **Per-machine wings** — set `MEMPAL_REMOTE_WING=<machinename>` on
each client so transcripts file under separate wings; cross-wing
search still works via the palace graph.
+82
View File
@@ -0,0 +1,82 @@
# MemPalace Unraid Compose
# -----------------------------------------------------------------------------
# Two-container stack: mempalace (MCP-over-SSE on 8765 + HTTP ingest on 8766,
# both bound to localhost only) plus a Caddy sidecar that terminates TLS,
# enforces a bearer token, and reverse-proxies both endpoints on :8443.
#
# Use this with the Unraid Compose Manager plugin. Build context is the
# repo root (../..); on Unraid, sync the repo to /mnt/user/<somewhere>/mempalace
# and from this directory run:
#
# # 1. Generate a token (do this once, keep it secret):
# openssl rand -hex 32 > .env.token
# echo "MEMPAL_TOKEN=$(cat .env.token)" > .env
# rm .env.token
#
# # 2. Build and start:
# docker compose up -d --build
#
# Endpoints (after start):
# https://<unraid-ip>:8443/sse — MCP for AI clients
# https://<unraid-ip>:8443/ingest/... — transcript uploads from hooks
# https://<unraid-ip>:8443/healthz — liveness, no auth
#
# Caddy uses a self-signed cert (`tls internal`); clients must accept it,
# typically via a `--insecure`-style flag or by trusting the Caddy root CA.
# -----------------------------------------------------------------------------
services:
mempalace:
build:
context: ../..
dockerfile: Dockerfile
image: mempalace-server:latest
container_name: mempalace
restart: unless-stopped
# Not published on the host — only Caddy reaches these ports over the
# internal compose network. This is the auth boundary.
expose:
- "8765"
- "8766"
volumes:
- /mnt/user/appdata/mempalace:/data
environment:
MEMPALACE_PALACE_PATH: /data/palace
MEMPALACE_INGEST_PORT: "8766"
MEMPALACE_INGEST_HOST: "0.0.0.0"
# Defense-in-depth — Caddy is the primary gate, but if it's bypassed
# (e.g. someone exec'd into the container's network), the ingest
# server still requires the token.
MEMPALACE_INGEST_TOKEN: "${MEMPAL_TOKEN}"
# Languages for entity detection (comma-separated):
# MEMPALACE_ENTITY_LANGUAGES: en
user: "99:100"
networks:
- mempal
# Override the image CMD: bind mcp-proxy to all interfaces inside the
# container network so Caddy can reach it. The ingest server thread
# spawns from MEMPALACE_INGEST_PORT.
command: >
mcp-proxy --sse-host 0.0.0.0 --sse-port 8765
--pass-environment -- mempalace-mcp
caddy:
image: caddy:2-alpine
container_name: mempalace-caddy
restart: unless-stopped
depends_on:
- mempalace
ports:
- "8443:8443"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile:ro
- /mnt/user/appdata/mempalace-caddy/data:/data
- /mnt/user/appdata/mempalace-caddy/config:/config
environment:
MEMPAL_TOKEN: "${MEMPAL_TOKEN}"
networks:
- mempal
networks:
mempal:
driver: bridge
+99
View File
@@ -0,0 +1,99 @@
<?xml version="1.0"?>
<Container version="2">
<Name>MemPalace</Name>
<Repository>mempalace-server:latest</Repository>
<Registry>https://github.com/MemPalace/mempalace</Registry>
<Network>bridge</Network>
<MyIP/>
<Shell>sh</Shell>
<Privileged>false</Privileged>
<Support>https://github.com/MemPalace/mempalace/issues</Support>
<Project>https://github.com/MemPalace/mempalace</Project>
<Overview>
Local-first AI memory server. Stores conversations and project content
verbatim in a searchable palace, exposed to MCP-compatible AI tools
(Claude Code, Codex, Antigravity, etc.) over Server-Sent Events on
port 8765.
The image is built locally — see Dockerfile in the repo root. From the
Unraid CLI:
cd /mnt/user/&lt;path&gt;/mempalace
docker build -t mempalace-server:latest .
Then add this template via Add Container -- Template.
Mount /mnt/user/appdata/mempalace to /data for persistent storage of
the ChromaDB index, SQLite knowledge graph, and embedding-model cache.
SECURITY: this container exposes the MCP endpoint without authentication.
Bind it to a trusted network (LAN-only or Tailscale) or place it behind
SWAG / Nginx Proxy Manager with bearer-token or basic auth.
Endpoint: http://[UNRAID-IP]:8765/sse
</Overview>
<Category>Productivity: Tools: Other:</Category>
<WebUI/>
<TemplateURL/>
<Icon>https://raw.githubusercontent.com/MemPalace/mempalace/develop/assets/mempalace_logo.png</Icon>
<ExtraParams>--user 99:100</ExtraParams>
<PostArgs/>
<CPUset/>
<DateInstalled/>
<DonateText/>
<DonateLink/>
<Description>
Persistent AI memory across machines. Connect Claude Code, Codex,
Antigravity, or any MCP-compatible client to a single shared palace.
</Description>
<Config Name="MCP SSE port"
Target="8765"
Default="8765"
Mode="tcp"
Description="Port the MCP-over-SSE endpoint listens on. Clients connect to http://[UNRAID-IP]:[PORT]/sse."
Type="Port"
Display="always"
Required="true"
Mask="false">8765</Config>
<Config Name="Appdata"
Target="/data"
Default="/mnt/user/appdata/mempalace"
Mode="rw"
Description="Persistent storage for the palace (ChromaDB), knowledge graph (SQLite), embedding-model cache, and config."
Type="Path"
Display="always"
Required="true"
Mask="false">/mnt/user/appdata/mempalace</Config>
<Config Name="Palace path (inside container)"
Target="MEMPALACE_PALACE_PATH"
Default="/data/palace"
Mode=""
Description="Subdirectory inside /data where ChromaDB files live. Change only if migrating from a different layout."
Type="Variable"
Display="advanced"
Required="false"
Mask="false">/data/palace</Config>
<Config Name="Embedding device"
Target="MEMPALACE_EMBEDDING_DEVICE"
Default=""
Mode=""
Description="ONNX execution provider: cpu | cuda | dml | coreml. Leave blank for auto. CUDA requires the NVIDIA Driver plugin and GPU passthrough; the image must be rebuilt with the [gpu] extra installed."
Type="Variable"
Display="advanced"
Required="false"
Mask="false"></Config>
<Config Name="Entity-detection languages"
Target="MEMPALACE_ENTITY_LANGUAGES"
Default="en"
Mode=""
Description="Comma-separated language codes for entity detection (e.g. en,es,de)."
Type="Variable"
Display="advanced"
Required="false"
Mask="false">en</Config>
</Container>
-144
View File
@@ -1,144 +0,0 @@
# MemPalace — History, Corrections, and Public Notices
This file is the canonical record of post-launch corrections, public notices,
and retractions that affect MemPalace's public claims. Newest first.
---
## 2026-04-14 — Benchmark table rewrite (issue [#875](https://github.com/MemPalace/mempalace/issues/875))
A community audit identified a category error in the public benchmark tables
on `README.md` and `mempalaceofficial.com`: MemPalace's retrieval recall
numbers (R@5, R@10) were listed in the same columns as competitors'
end-to-end QA accuracy numbers. They are different metrics and are not
comparable — a system can have 100% retrieval recall and 40% QA accuracy.
The audit also found that the retracted "+34% palace boost" claim (see the
April 7 note below) was still present in multiple surfaces despite that
retraction, and that two competitor numbers (`Mem0 ~85%`, `Zep ~85%`) had no
published source and did not match the metrics those projects actually
publish.
What changed in this PR:
- The headline number on all surfaces is now **96.6% R@5 on LongMemEval in
raw mode**, independently reproduced on Linux x86_64 against the tagged
v3.3.0 release on 2026-04-14. Result JSONLs are committed under
`benchmarks/results_*.jsonl` (see PR description for the scorecard).
- The **"100% with Haiku rerank"** claim has been removed from all public
comparison tables. It reproduces on our machines and with a different LLM
family (minimax-m2.7 via Ollama Cloud: 99.2% R@5 / 100.0% R@10 on the full
500-question LongMemEval set) — but the 99.4% → 100% step was developed
by inspecting three specific wrong answers (`benchmarks/BENCHMARKS.md` has
called this "teaching to the test" since February). It belongs in the
methodology document, not in a headline.
- The **honest held-out number** for the hybrid pipeline — 98.4% R@5 on 450
questions that `hybrid_v4` was never tuned on, deterministic seed — is now
the comparable figure when an LLM rerank is involved.
- The **retracted "+34% palace boost"** has been removed from
`README.md`, `website/concepts/the-palace.md`,
`website/guide/searching.md`, and `website/reference/contributing.md`.
Wing and room filters remain useful — they're standard metadata filters —
but they are not presented as a novel retrieval improvement.
- **Competitor comparison tables** mixing retrieval recall with QA accuracy
have been removed from `README.md` and `website/reference/benchmarks.md`.
Where MemPalace can be fairly compared on the same metric, we link to the
cited source. Otherwise we report our own numbers and let readers draw
their own conclusions.
- **Reproduction instructions** in `benchmarks/BENCHMARKS.md` and
`benchmarks/README.md` were pointing at a defunct branch
(`aya-thekeeper/mempal`); they now point at `MemPalace/mempalace`.
- The **LoCoMo 100% R@10 with top-50 rerank** row has been removed from
public comparison surfaces. With per-conversation session counts of 1932
and `top_k=50`, the retrieval stage returns every session in the
conversation by construction, so the number measures an LLM's
reading comprehension over the whole conversation, not retrieval.
Thanks to [@dial481](https://github.com/MemPalace/mempalace/issues/875) for
the detailed audit and to [@rohitg00](https://github.com/rohitg00) for the
parallel write-up in Discussion #747.
---
## 2026-04-11 — Impostor domains and malware
Several community members (issues #267, #326, #506) reported fake MemPalace
websites distributing malware. The only official surfaces for this project
are:
- This GitHub repository: [github.com/MemPalace/mempalace](https://github.com/MemPalace/mempalace)
- The PyPI package: [pypi.org/project/mempalace](https://pypi.org/project/mempalace/)
- The docs site: [mempalaceofficial.com](https://mempalaceofficial.com)
Any other domain — `mempalace.tech` being the one most commonly reported —
is not ours. Never run install scripts from unofficial sites.
Thanks to our community members for flagging the problem.
---
## 2026-04-07 — A Note from Milla & Ben
> The community caught real problems in this README within hours of launch
> and we want to address them directly.
>
> **What we got wrong:**
>
> - **The AAAK token example was incorrect.** We used a rough heuristic
> (`len(text)//3`) for token counts instead of an actual tokenizer. Real
> counts via OpenAI's tokenizer: the English example is 66 tokens, the
> AAAK example is 73. AAAK does not save tokens at small scales — it's
> designed for *repeated entities at scale*, and the README example was a
> bad demonstration of that. We're rewriting it.
>
> - **"30x lossless compression" was overstated.** AAAK is a lossy
> abbreviation system (entity codes, sentence truncation). Independent
> benchmarks show AAAK mode scores **84.2% R@5 vs raw mode's 96.6%** on
> LongMemEval — a 12.4 point regression. The honest framing is: AAAK is
> an experimental compression layer that trades fidelity for token
> density, and **the 96.6% headline number is from RAW mode, not AAAK**.
>
> - **"+34% palace boost" was misleading.** That number compares unfiltered
> search to wing+room metadata filtering. Metadata filtering is a
> standard feature of the underlying vector store, not a novel retrieval
> mechanism. Real and useful, but not a moat.
>
> - **"Contradiction detection"** exists as a separate utility
> (`fact_checker.py`) but is not currently wired into the knowledge graph
> operations as the README implied.
>
> - **"100% with Haiku rerank"** is real (we have the result files) but
> the rerank pipeline is not in the public benchmark scripts. We're
> adding it.
>
> **What's still true and reproducible:**
>
> - **96.6% R@5 on LongMemEval in raw mode**, on 500 questions, zero API
> calls — independently reproduced on M2 Ultra in under 5 minutes by
> [@gizmax](https://github.com/MemPalace/mempalace/issues/39).
> - Local, free, no subscription, no cloud, no data leaving your machine.
> - The architecture (wings, rooms, closets, drawers) is real and useful,
> even if it's not a magical retrieval boost.
>
> **What we're doing:**
>
> 1. Rewriting the AAAK example with real tokenizer counts and a scenario
> where AAAK actually demonstrates compression
> 2. Adding `mode raw / aaak / rooms` clearly to the benchmark
> documentation so the trade-offs are visible
> 3. Wiring `fact_checker.py` into the KG ops so the contradiction
> detection claim becomes true
> 4. Pinning the vector store dependency to a tested range (issue #100),
> fixing the shell injection in hooks (#110), and addressing the macOS
> ARM64 segfault (#74)
>
> **Thank you to everyone who poked holes in this.** Brutal honest
> criticism is exactly what makes open source work, and it's what we asked
> for. Special thanks to
> [@panuhorsmalahti](https://github.com/MemPalace/mempalace/issues/43),
> [@lhl](https://github.com/MemPalace/mempalace/issues/27),
> [@gizmax](https://github.com/MemPalace/mempalace/issues/39), and everyone
> who filed an issue or a PR in the first 48 hours. We're listening, we're
> fixing, and we'd rather be right than impressive.
>
> — *Milla Jovovich & Ben Sigman*
-768
View File
@@ -1,768 +0,0 @@
# RFC 002 — Source Adapter Plugin Specification
- **Status:** Draft
- **Tracking issue:** [#989](https://github.com/MemPalace/mempalace/issues/989)
- **Related:** [#274](https://github.com/MemPalace/mempalace/issues/274), [#23](https://github.com/MemPalace/mempalace/pull/23), [#169](https://github.com/MemPalace/mempalace/pull/169), [#232](https://github.com/MemPalace/mempalace/pull/232), [#567](https://github.com/MemPalace/mempalace/pull/567), [#98](https://github.com/MemPalace/mempalace/pull/98), [#591](https://github.com/MemPalace/mempalace/pull/591), [#592](https://github.com/MemPalace/mempalace/pull/592), [#702](https://github.com/MemPalace/mempalace/pull/702), [#981](https://github.com/MemPalace/mempalace/issues/981), [#244](https://github.com/MemPalace/mempalace/pull/244), [#419](https://github.com/MemPalace/mempalace/pull/419), [#300](https://github.com/MemPalace/mempalace/pull/300), [#952](https://github.com/MemPalace/mempalace/pull/952), [#389](https://github.com/MemPalace/mempalace/pull/389), [#434](https://github.com/MemPalace/mempalace/pull/434)
- **Sibling spec:** [RFC 001 — Storage Backend Plugin Specification](001-storage-backend-plugin-spec.md)
- **Spec version:** `1.0`
## Summary
A formal contract for MemPalace source adapters so third parties can ship `pip install mempalace-source-<name>` packages (Cursor, OpenCode, git, Slack, Notion, email, calendar, Whisper transcripts, …) that drop into `mempalace mine` without patching core. The spec defines the adapter interface, record shape, metadata schema contract, privacy class, entry-point registration, incremental-ingest semantics, closet integration, a declared-transformation model that replaces the informal "verbatim" promise with a verifiable one, conformance tests, and the refactor of the existing file and conversation miners into first-party adapters on the same contract.
RFC 001 formalized the write side (where drawers are stored). This RFC formalizes the read side (where content comes from). Both are required for MemPalace to function as a durable daemon managing heterogeneous palaces across many source types.
## Motivation
Six source ingesters are currently in flight, each solving the same problem a different way:
| PR / Issue | Source | Mechanism |
|---|---|---|
| [#274](https://github.com/MemPalace/mempalace/issues/274) | Cursor | `workspaceStorage/*.vscdb` SQLite extraction |
| [#23](https://github.com/MemPalace/mempalace/pull/23) | OpenCode | SQLite session database |
| [#169](https://github.com/MemPalace/mempalace/pull/169) | Pi agent | JSONL session normalizer |
| [#232](https://github.com/MemPalace/mempalace/pull/232) | Cursor (JSONL variant) | JSONL normalizer |
| [#567](https://github.com/MemPalace/mempalace/pull/567), [#98](https://github.com/MemPalace/mempalace/pull/98) | Git | `git log` + `gh pr view` with structured diff summary |
| [#591](https://github.com/MemPalace/mempalace/pull/591), [#592](https://github.com/MemPalace/mempalace/pull/592) | Delphi Oracle | Real-time intelligence signals |
| [#702](https://github.com/MemPalace/mempalace/pull/702) | Cursor + factory.ai | Combined session miners |
Plus three ingesters already grafted into core:
- `mempalace/miner.py` — filesystem project miner, fixed char-window chunking, keyword hall routing
- `mempalace/convo_miner.py` — chat transcript miner with exchange-pair chunking
- `mempalace/normalize.py` — format detection for four chat-export shapes (Claude Code JSONL, Codex JSONL, Claude.ai / ChatGPT / Slack JSON)
Plus one open proposal for a different ingest semantic:
- [#981](https://github.com/MemPalace/mempalace/issues/981) — path-level descriptions: mine metadata-as-content instead of raw bytes for matched paths. This is a legitimate third ingest mode (alongside chunked-content and whole-record) that the current architecture has no home for.
Each contributor has reinvented source discovery, source-item identity, incremental-ingest bookkeeping, metadata shape, and chunking strategy. Format detection for new chat exports lands in `normalize.py` as one more branch in an `if` chain. There is no shared abstraction, no conformance suite, and no contract new adapter authors can build against.
This is the same situation RFC 001 addresses for storage backends: a pattern that emerged organically, now needs a specification so the community can contribute cleanly and enterprises can build against a stable surface.
### Why this matters beyond developer tooling
The adapter pattern is source-agnostic. What has so far shown up as "Cursor transcripts" and "git commits" generalizes to:
- **Knowledge work** — Notion, Obsidian, Logseq, Google Docs, iA Writer, Zettlr
- **Communications** — Slack, Discord, Teams, Signal backups, mbox/eml email, iMessage
- **Research** — arXiv PDFs, Zotero libraries, bookmarked articles, Kindle highlights, web archives
- **Creator workflows** — YouTube captions, podcast transcripts (Whisper/Deepgram), Descript projects
- **Regulated domains** — medical records, legal filings, financial statements (all gated on §6 privacy class)
Enterprises key on their own domain metadata — `repo/PR/SHA` for engineering, `patient/encounter/CPT` for healthcare, `case/docket/jurisdiction` for legal. The schema lives in the adapter; the content lives in the drawer. This is how structured-data use cases are served without violating the byte-preservation commitments adapters make.
## Goals
1. A source adapter ships as a standalone Python package; `pip install mempalace-source-<name>` is sufficient to use it.
2. `mempalace mine` and the MCP mine tool are source-agnostic — all extraction goes through registered adapters. No `if source_type == 'foo'` branches in core.
3. Content transformations are **declared** (§1.4): each adapter advertises the set of transformations it applies to source bytes. Byte-preserving adapters declare the empty set. Consumers can programmatically determine what happened to their data.
4. Incremental ingest is cheap and correct: re-running mine only touches items whose source-side version changed, using the palace itself as the cursor (no sidecar).
5. Each adapter declares a structured metadata schema. Enterprises index and filter on that schema. Core is schema-agnostic beyond the universal fields in §5.1.
6. The existing `miner.py` and `convo_miner.py` become the first two first-party adapters on the new contract. Drawer metadata fields and field names are preserved — the spec adds fields, does not rename them.
7. A privacy class is declarable at the adapter boundary so sensitive sources (medical, financial, personal comms) are handled with explicit policy rather than implicit trust.
## Non-goals
- Defining chunking. Each adapter owns its chunking strategy — tree-sitter for code, exchange-pair for chat, whole-record for a PR. Core does not impose a chunk size.
- Defining live-stream / webhook shapes (the Delphi Oracle pattern of continuous signal ingestion). That is a separate future RFC; v1 is pull-mode.
- Defining LLM-based structured extraction. Adapters MAY use an LLM; the spec does not mandate or standardize this.
- Defining cross-adapter dedup. When the same content appears via two adapters (e.g., a PR body mined via `git` and as a conversation quote mined via `claude-code`), both drawers land. Deduplication policy is a separate concern handled at query time by `searcher.py`.
- Defining closet construction. Core continues to build closets from adapter-yielded drawers (§1.7); the closet-building algorithm itself is not part of this spec.
---
## 1. Source adapter contract
### 1.1 Required method
All adapters implement `BaseSourceAdapter` with a single kwargs-only ingest method:
```python
class BaseSourceAdapter(ABC):
@abstractmethod
def ingest(
self,
*,
source: SourceRef,
palace: PalaceContext,
) -> Iterator[IngestResult]:
"""Enumerate and extract content from a source.
Yields a stream of IngestResult values. Lazy adapters yield
`SourceItemMetadata` ahead of the drawers for that item, so core
can report progress and check `is_current` before the adapter
commits to the fetch. Adapters with no lazy-fetch benefit may
interleave `SourceItemMetadata` and `DrawerRecord` items freely.
"""
@abstractmethod
def describe_schema(self) -> AdapterSchema:
"""Declare the structured metadata this adapter attaches.
Returned value is stable for a given adapter version. Enterprises
index on this schema; core uses it to validate adapter output.
"""
```
The single-method `ingest()` contract was chosen over a `discover` / `extract` split. Most current ingesters have no meaningful laziness benefit (filesystem walking is cheap, transcript normalizing is cheap). Adapters that do (git-mine's `gh pr list` vs `gh pr view`; hypothetical Slack/Notion API) express laziness by yielding `SourceItemMetadata` first and deferring fetch until core confirms staleness via `is_current()`.
### 1.2 Optional methods (default implementations on the ABC)
```python
def is_current(
self,
*,
item: SourceItemMetadata,
existing_metadata: dict | None,
) -> bool:
"""Return True if the palace already has an up-to-date copy.
Called by core after querying the palace for existing drawers with
matching source_file. The adapter compares its version token against
the stored metadata and returns True to skip extraction.
Default implementation: returns False (always re-extract). Adapters
advertising `supports_incremental` override this.
"""
return False
def source_summary(self, *, source: SourceRef) -> SourceSummary:
"""Describe a source without extracting (e.g., 'git repo mempalace,
847 commits, 132 PRs'). Default: returns empty summary."""
return SourceSummary(description=self.name)
def close(self) -> None:
return None
```
Core's incremental loop (pseudocode):
```python
for result in adapter.ingest(source=source, palace=ctx):
if isinstance(result, SourceItemMetadata):
existing = ctx.collection.get(where={"source_file": result.source_file}, limit=1)
if adapter.is_current(item=result, existing_metadata=existing):
ctx.skip_current_item() # adapter stops yielding drawers for this item
elif isinstance(result, DrawerRecord):
ctx.upsert_drawer(result)
```
### 1.3 Typed records
```python
@dataclass(frozen=True)
class SourceRef:
"""A handle to the source a user wants to ingest.
local_path is for filesystem-rooted sources (project dir, mbox file).
uri is for URL-like references (github.com/org/repo, slack://workspace/channel).
options carries adapter-specific config (non-secret values only; §M2).
"""
local_path: str | None = None
uri: str | None = None
options: dict = field(default_factory=dict)
@dataclass(frozen=True)
class SourceItemMetadata:
"""Lightweight pointer yielded before drawers for lazy-fetch adapters."""
source_file: str # Logical identity — filesystem path, PR URI, etc.
version: str # Source-side version token (mtime, commit SHA, ETag, rev id).
size_hint: int | None = None # Bytes, if known. Used for progress reporting.
route_hint: RouteHint | None = None
@dataclass(frozen=True)
class DrawerRecord:
"""One drawer's worth of content plus metadata."""
content: str # Subject to §1.4 declared transformations.
source_file: str # Foreign key to SourceItemMetadata.source_file.
chunk_index: int = 0 # 0 for single-drawer items; 0..N-1 for chunked items.
metadata: dict = field(default_factory=dict) # Flat: str/int/float/bool only. Must conform to adapter schema.
route_hint: RouteHint | None = None
@dataclass(frozen=True)
class RouteHint:
wing: str | None = None
room: str | None = None
hall: str | None = None
@dataclass(frozen=True)
class SourceSummary:
description: str
item_count: int | None = None
# IngestResult is the union type adapters yield.
IngestResult = SourceItemMetadata | DrawerRecord
# PalaceContext carries collection handles, palace config, and progress hooks
# into the adapter. Full definition in §9 (cleanup prerequisite).
```
### 1.4 Declared transformations
Adapters cannot silently alter content. Every adapter declares the set of transformations it applies:
```python
class BaseSourceAdapter(ABC):
declared_transformations: ClassVar[frozenset[str]] = frozenset()
```
The invariant: **no transformation is applied that is not declared in this set**. Adapters declaring `frozenset()` are byte-preserving end-to-end (modulo the read, which may itself involve `utf8_replace_invalid` — see below).
Reserved transformation names (v1):
| Name | Meaning |
|---|---|
| `utf8_replace_invalid` | Undecodable bytes replaced with U+FFFD on read (equivalent to `open(..., errors="replace")`). |
| `newline_normalize` | CRLF / CR converted to LF. |
| `whitespace_trim` | Leading / trailing whitespace stripped at a record boundary. |
| `whitespace_collapse_internal` | Runs of three or more blank lines collapsed to two. |
| `line_trim` | Each line individually stripped of leading / trailing whitespace. |
| `line_join_spaces` | Adjacent lines joined with single spaces, newlines discarded. |
| `blank_line_drop` | Empty lines between non-empty lines dropped. |
| `strip_tool_chrome` | System tags, hook output, tool UI chrome removed (see `normalize.strip_noise`). |
| `tool_result_truncate` | Tool output heads/tails kept; middle replaced with a marker string. |
| `spellcheck_user` | User turns rewritten by spellcheck. |
| `synthesized_marker` | Adapter inserts its own strings (e.g., `[N lines omitted]`, `[registry] …`, Slack provenance footer). |
| `speaker_role_assignment` | Multi-party speakers alternately assigned `user` / `assistant` roles (Slack). |
| `tool_result_omitted` | Some tool outputs fully omitted from transcript (e.g., Read/Edit/Write results in `normalize._format_tool_result`). |
Adapters MAY define their own transformation names for behaviors the reserved list does not cover. Third-party names SHOULD be prefixed with the adapter name to avoid collisions (e.g., `cursor.composer_ordering`).
**Capability derivation:**
- `byte_preserving` — declared_transformations is empty AND output bytes equal input bytes for any source the adapter can read. Advertised via the `byte_preserving` capability (§2.1). MUST be verified by §7.2 round-trip test.
- `declared_lossy` — declared_transformations is non-empty. The adapter's output is reproducible from source by applying *only* the declared transformations. MUST be verified by §7.3 declared-transformation test.
**Existing code mapping (for the cleanup PR):**
| Module | Declared transformations |
|---|---|
| `filesystem` (current `miner.py`) | `utf8_replace_invalid`, `whitespace_trim` |
| `conversations` (current `convo_miner.py` + `normalize.py`) | `utf8_replace_invalid`, `newline_normalize`, `line_trim`, `line_join_spaces`, `blank_line_drop`, `whitespace_collapse_internal`, `strip_tool_chrome`, `tool_result_truncate`, `tool_result_omitted`, `spellcheck_user`, `synthesized_marker`, `speaker_role_assignment` |
The filesystem adapter is nearly byte-preserving today; the conversations adapter is extensively transformed. Both are honest after this spec lands because both are fully declared.
This replaces the MISSION.md promise of "verbatim always" with a stronger one: every adapter publishes what it does to your data, and the conformance suite verifies it hasn't lied. "Verbatim" becomes a capability some adapters hold (byte_preserving), not a global claim about a lossy pipeline.
### 1.5 Three ingest modes
A single adapter declares one or more of three modes via a class attribute:
```python
class BaseSourceAdapter(ABC):
supported_modes: ClassVar[frozenset[Literal["chunked_content", "whole_record", "metadata_only"]]]
```
| Mode | Content origin |
|---|---|
| `chunked_content` | Source bytes, split into chunks the adapter chooses (current filesystem behavior). |
| `whole_record` | Source bytes, one drawer per source item (e.g., PR → 1 drawer). |
| `metadata_only` | Synthesized description of a source item (absorbs #981). The description bytes are authored by the user or adapter, not the source. Declared transformations (§1.4) do not apply — content is not derived from source bytes. |
`metadata_only` resolves #981: description-mode matches a path pattern and produces one drawer whose content is the user-authored description rather than the file contents. Conformance tests (§7.2, §7.3) skip `metadata_only` records.
An adapter MAY support multiple modes and select per-item; the per-item mode is recorded in `metadata["ingest_mode"]` (§5.1). This field already exists on conversation drawers (`convo_miner.py:346`) and is the only existing field whose semantics this spec extends rather than preserves.
### 1.6 Chunking delegation
Core does not impose chunking. `miner.py`'s 800-character sliding window is the filesystem adapter's default for unknown file types — not a contract. Adapter authors choose what makes sense:
- Code files → tree-sitter function/class boundaries (future enhancement to the filesystem adapter).
- Chat transcripts → exchange pairs (current `convo_miner.py` behavior).
- PRs → whole-record (current `git-mine` behavior in #567).
- PDFs → page or section.
- Voice transcripts → speaker turn.
The sole cross-adapter requirement for `chunked_content` mode: chunks for a given `source_file`, re-assembled in `chunk_index` order and accounting for declared transformations in §1.4, reproduce the adapter's internal representation of the source. The conformance suite verifies this.
### 1.7 Closet integration
Closets are the AAAK-compressed index layer (`palace.build_closet_lines`, `upsert_closet_lines`) that points to drawer content and enables LLM-scale scanning without reading every drawer. Closet-building is not an adapter concern:
- **Core builds closets** from adapter-yielded drawers as a post-step, via the existing `palace.py` helpers. Adapters do not call these APIs.
- **Adapters MAY emit closet hints** in drawer metadata via a flat `;`-joined string:
```python
metadata["closet_hints"] = "decided GraphQL; migrated to Postgres; fixed PR-567"
```
Core splits on `;` and feeds these as candidate topics alongside the content-scanned ones in `build_closet_lines`. The git adapter can hint decision-signal quotes that raw content-scanning would miss; the conversations adapter can hint section headers; the filesystem adapter has no need and omits the field.
- **metadata_only drawers get closets too.** Core builds them from the synthesized description content the same way it builds closets for any other drawer. This is how #981's path-level descriptions become searchable.
- **Closet purging** remains keyed on `source_file` (`purge_file_closets` in `palace.py:221`). Adapters' source_file values must be stable so purge is correct on re-ingest.
Current `convo_miner.py` does not build closets for conversation drawers — an existing gap. The cleanup PR (§9) routes the conversations adapter through the same post-step closet builder as filesystem, closing the gap as a side effect.
---
## 2. Adapter contract
### 2.1 Identity and capabilities
```python
class BaseSourceAdapter(ABC):
name: ClassVar[str] # "filesystem", "cursor", "git", "slack", ...
spec_version: ClassVar[str] = "1.0"
adapter_version: ClassVar[str] # Independent of spec_version; recorded on every drawer.
capabilities: ClassVar[frozenset[str]]
supported_modes: ClassVar[frozenset[str]] # Per §1.5.
declared_transformations: ClassVar[frozenset[str]] # Per §1.4.
default_privacy_class: ClassVar[str] # Per §6.
```
Defined capability tokens (v1):
| Token | Meaning |
|---|---|
| `byte_preserving` | `declared_transformations` is empty AND extracted content equals source bytes. |
| `supports_incremental` | Implements `is_current()` meaningfully; `ingest()` respects `ctx.skip_current_item()`. |
| `supports_structured_metadata` | Attaches fields beyond §5.1 universals. |
| `supports_entity_hints` | Emits entity hints via `metadata["entity_hints_json"]` (§5.4). |
| `supports_kg_triples` | Writes knowledge-graph triples directly to the SQLite KG (§5.5). |
| `supports_closet_hints` | Emits `metadata["closet_hints"]` (§1.7). |
| `requires_auth` | Needs credentials at runtime (env vars — §4.2). |
| `requires_external_service` | Needs a running service (Slack API, email server). |
| `requires_local_tool` | Needs a local binary (`gh`, `rg`, `whisper`). |
| `adapter_owns_routing` | Returns authoritative `RouteHint` values from `ingest()` that core uses as-is (§G3 / §2.5). |
| `respects_privacy_class` | Honors §6 privacy-class filtering. |
Capability tokens are free-form strings; third-party adapters MAY declare novel tokens for their ecosystem. Core only inspects the above.
### 2.2 Source references
See `SourceRef` in §1.3. The shape is deliberately open — adapters parse `uri` and `options` as they see fit. Core does not canonicalize URIs.
**Secrets in `SourceRef.options`:** credentials MUST NOT be placed in `options`. The spec reserves `options` for non-secret values (paths, filters, date ranges). Secrets come from env vars per §4.2. An adapter that reads a credential from `options` violates the spec and MUST be rejected by the conformance suite.
### 2.3 Lifecycle
1. `__init__`: lightweight. No I/O, no network, no credential fetch.
2. First call to `ingest`: may open resources. All I/O is lazy.
3. `close()`: releases all resources. After `close()`, further calls MUST raise `AdapterClosedError`.
### 2.4 Concurrency
An adapter instance is long-lived and serves many mine operations. Adapters MUST be thread-safe for concurrent `ingest` calls across different `SourceRef` values. MemPalace core serializes calls within a single `SourceRef` unless an adapter advertises `supports_parallel_ingest` (not in v1 — reserved for v1.1).
### 2.5 Routing
Routing is the adapter's responsibility. The filesystem adapter reads `mempalace.yaml` (hall keywords, rooms list) via `MempalaceConfig()` and returns `RouteHint(wing=..., room=..., hall=...)` on each drawer. This relocates `detect_room()` and `detect_hall()` (currently in `miner.py` and `convo_miner.py`) into their respective adapters.
Order of precedence for routing:
1. Explicit `--wing` / `--room` CLI flags → passed through `SourceRef.options` → adapter honors verbatim.
2. Palace config match (`mempalace.yaml` hall keywords, room keywords) → adapter computes.
3. Adapter-internal fallback (e.g., filesystem adapter falls back to `"general"` room).
Adapters advertising `adapter_owns_routing` return the final answer; core uses it verbatim. Adapters not advertising it return None and core applies a generic fallback router (writing to wing `default`, room `general`, hall `general`). Absent any adapter, this is how `mempalace mine` behaves today.
### 2.6 Incremental ingest
`is_current()` is the incremental-ingest primitive. The palace itself is the cursor — no separate persisted state. Correctness requirements:
- The adapter's `SourceItemMetadata.source_file` MUST be stable across re-ingests of the same logical item. Filesystem adapter uses the absolute path (as today). Git adapter uses a URI shape like `github.com/org/repo#pr=567` or `github.com/org/repo#commit=abc123`.
- `is_current()` returns True when the stored metadata matches the adapter's current version token. The default implementation returns False (always re-extract) — adapters advertising `supports_incremental` override.
- Deletion tombstones: an adapter MAY yield a `SourceItemMetadata(source_file=..., version="__deleted__")` entry — core purges drawers with matching `source_file` and builds no new drawers for that item. Advertised via `supports_deletion_tombstones`.
- Adapters without `supports_incremental` ignore `is_current()` and fully re-extract. Core logs a warning.
### 2.7 Errors
- `SourceNotFoundError` — the `SourceRef` does not resolve.
- `AuthRequiredError` — adapter needs credentials; raises with a message describing which env vars to set.
- `AdapterClosedError` — method called after `close()`.
- `TransformationViolationError` — conformance suite raises this when the content round-trip requires an undeclared transformation.
- `SchemaConformanceError` — a `DrawerRecord.metadata` is missing required fields declared in `describe_schema()` or violates declared types.
---
## 3. Registration and discovery
### 3.1 Entry points (primary mechanism)
Third-party adapters ship as installable packages:
```toml
# pyproject.toml of mempalace-source-cursor
[project.entry-points."mempalace.sources"]
cursor = "mempalace_source_cursor:CursorAdapter"
```
MemPalace discovers adapters at process start via `importlib.metadata.entry_points(group="mempalace.sources")`.
### 3.2 In-tree registry (secondary)
```python
from mempalace.sources.registry import register
register("my-experimental-adapter", MyAdapter)
```
Entry-point discovery and explicit `register()` populate the same registry. Explicit registration wins on name conflict.
### 3.3 Selection (explicit only — no auto-detect)
Unlike storage backends (RFC 001 §3.3), source adapters are never auto-detected. The user selects the adapter explicitly:
```bash
mempalace mine --source cursor ~/ # explicit adapter
mempalace mine --source git /path/to/repo # explicit adapter
mempalace mine --source filesystem /path/to/project # explicit adapter
mempalace mine /path/to/project # implicit: filesystem (default)
```
The default when no `--source` is given is `filesystem`, preserving current `mempalace mine <path>` behavior.
**Backwards compatibility with `--mode`.** Current `cli.py:517-519` exposes `--mode {projects,convos}`. This spec maps:
- `--mode projects` → `--source filesystem` (the new default)
- `--mode convos` → `--source conversations`
`--mode` stays as a deprecated alias through v4.x with a deprecation warning on use; removed in v5.0.
Auto-detection would be hostile — a directory containing a `.git` folder, a `workspaceStorage/` subdir, and an `mbox` file is not a signal of user intent.
---
## 4. Configuration
### 4.1 Shape
```json
{
"sources": {
"my-cursor": {
"type": "cursor",
"workspace_storage": "~/Library/Application Support/Cursor/User/workspaceStorage"
},
"my-git": {
"type": "git",
"repos": ["/projects/mempalace", "/projects/site"]
}
},
"palaces": {
"work": {
"sources": ["my-git"],
"privacy_floor": "internal"
},
"personal": {
"sources": ["my-cursor"]
}
}
}
```
Single-user local mode: config is optional. `mempalace mine <path>` with no config uses the `filesystem` adapter and defaults.
### 4.2 Environment variables
- `MEMPALACE_SOURCE_<NAME>_*` — per-adapter secrets and connection info. Examples: `MEMPALACE_SOURCE_SLACK_TOKEN`, `MEMPALACE_SOURCE_NOTION_API_KEY`, `MEMPALACE_SOURCE_GIT_GITHUB_TOKEN`.
- Secrets MUST be readable from env vars; config files carry structure, env vars carry credentials. Same rule as RFC 001 §4.2.
### 4.3 Adapter-specific options
`SourceRef.options` is a free-form dict of non-secret values (§2.2). Each adapter documents its accepted keys. Unknown keys MUST be ignored (forward compatibility); the adapter MAY log a warning.
---
## 5. Metadata schema contract
### 5.1 Universal fields
Existing drawer metadata fields are preserved — the spec adds the following:
| New field | Type | Added by | Purpose |
|---|---|---|---|
| `adapter_name` | `str` | core, from `BaseSourceAdapter.name` | Which registered source produced this drawer. |
| `adapter_version` | `str` | adapter | Adapter's own version (distinct from palace `normalize_version`). Enables re-extract workflows targeted at drawers from a known-buggy adapter version. |
| `privacy_class` | `str` | adapter default, config override | Per §6. |
Existing fields retain their current semantics (verified against `miner.py:542-561` and `convo_miner.py:338-350`):
| Existing field | Role in the spec |
|---|---|
| `source_file` | Functions as the adapter's source-item identifier. Adapter defines the shape — a filesystem path for filesystem, a URI like `github.com/org/repo#pr=123` for git. MUST be stable across re-ingests of the same logical item. |
| `source_mtime` | Functions as the source-item version for filesystem. Adapters without mtime semantics MAY omit this field and use a different version discriminator (e.g., commit SHA in a separate `metadata["commit_sha"]` field); the spec only requires that `is_current()` can decide staleness from the stored metadata. |
| `filed_at` | When the record was written. ISO-8601 string. |
| `added_by` | Agent name (e.g., `lumi`, `claude-code`). Orthogonal to `adapter_name` — the agent is *who* triggered mining; the adapter is *how* data was extracted. |
| `wing`, `room`, `hall` | Palace routing. Populated by adapter per §2.5. |
| `chunk_index` | Per §1.6. Always 0 for `whole_record` / `metadata_only`. |
| `normalize_version` | Palace-wide schema version (currently `palace.py:50`). Unchanged. Separate from `adapter_version`. |
| `entities` | Semicolon-joined candidate entity names. Already flat; kept flat (§5.4 replacement). |
| `ingest_mode` | Per §1.5. Already on conversation drawers; added to filesystem drawers by the cleanup PR. |
| `extract_mode` | Conversation-adapter-specific (`exchange` vs `general`). Moves into the conversations adapter's declared schema per §5.2. |
**Nothing is renamed. Nothing is removed.** The spec formalizes the shape ingesters already converge on. Existing `where={"source_file": ...}` queries in `searcher.py`, `palace.py`, and callers keep working.
**Chroma metadata constraint:** all metadata values MUST be `str | int | float | bool`. No lists, no nested dicts. This matches RFC 001 §1.4 and the underlying ChromaDB contract. Structured side-data goes to the SQLite knowledge graph (§5.5) or to a declared flat JSON-encoded string field (§5.4).
### 5.2 Adapter schemas
Each adapter returns an `AdapterSchema` from `describe_schema()`:
```python
@dataclass(frozen=True)
class AdapterSchema:
fields: dict[str, FieldSpec] # Keyed by metadata key.
version: str
@dataclass(frozen=True)
class FieldSpec:
type: Literal["string", "int", "float", "bool", "delimiter_joined_string", "json_string"]
required: bool
description: str
indexed: bool = False # Hint to backends that can build indexes (RFC 001 §2.1).
# delimiter_joined_string: the delimiter character (default ";").
delimiter: str = ";"
# json_string: the JSON schema of the encoded object (informational only).
json_schema: dict | None = None
```
`delimiter_joined_string` covers the `entities` shape (current `;`-joined list of names). `json_string` is the escape hatch for adapters needing to pack nested data — the value stored is still a single flat `str` from Chroma's perspective, but the adapter is allowed to document its parsed shape.
Example for a hypothetical `slack` adapter:
```python
AdapterSchema(
version="1.0",
fields={
"channel_name": FieldSpec(type="string", required=True, description="Slack channel name", indexed=True),
"channel_id": FieldSpec(type="string", required=True, description="Slack channel ID"),
"thread_ts": FieldSpec(type="string", required=False, description="Thread root timestamp"),
"author_id": FieldSpec(type="string", required=True, description="Slack user ID", indexed=True),
"author_name": FieldSpec(type="string", required=True, description="Display name at extraction time"),
"reactions": FieldSpec(type="delimiter_joined_string", required=False, description="Emoji shortcodes"),
},
)
```
### 5.3 Enterprise keying
The adapter schema is the stable surface enterprises filter on. A support team querying the palace for `channel_id = "C01234"` does not care about ChromaDB's internal representation. The schema field is declared by the adapter, indexed by the backend (RFC 001 §2.1 `supports_metadata_filters`), and exposed through the existing `where=` clause.
This is how "structured data" serves company use cases without breaking transformation guarantees: declared-transformation content in the drawer, structured fields in the metadata, schema declared by the adapter, filtering done by the backend.
### 5.4 Entity hints (optional)
Adapters with `supports_entity_hints` MAY include:
```python
metadata["entity_hints_json"] = '[{"type":"person","name":"Milla Jovovich","confidence":0.95,"offset":120},{"type":"project","name":"MemPalace","confidence":1.0,"offset":0}]'
```
The value is a JSON-encoded string (type `json_string` in the adapter schema). Core parses on read and feeds into `mempalace/entity_detector.py` as a prior: hints with `confidence >= 0.9` bypass the heuristic detector; lower-confidence hints feed into it as candidates.
This is additive to the existing flat `entities` field — entity_hints carries structure (type, confidence, offset); `entities` remains the Chroma-indexable flat string. An adapter that produces entity_hints MUST also populate `entities` as the flat name-only projection, so existing filter queries keep working.
### 5.5 Knowledge-graph triples (optional)
Adapters with `supports_kg_triples` write directly to the SQLite knowledge graph via `mempalace/knowledge_graph.py` — **not** to drawer metadata. Chroma cannot store structured triples; the KG already exists for this purpose.
The adapter calls the existing `KnowledgeGraph.add_triple()` (signature verified against `mempalace/knowledge_graph.py:130`):
```python
palace.kg.add_triple(
subject="Ben",
predicate="committed",
obj="PR-567", # `object` is a Python builtin — the API uses `obj`.
valid_from="2026-03-12",
confidence=1.0,
source_file=drawer.source_file, # Existing provenance parameter.
)
```
Drawer metadata includes a flat counter — `metadata["kg_triples_count"]: int` — so search consumers can see at a glance that KG side-data exists for a drawer without hitting SQLite.
The existing API has `source_closet` and `source_file` provenance parameters but no `source_drawer_id` or `adapter_name`. The cleanup PR (§9) should add these two optional parameters to `add_triple()` so adapter-written triples can be traced back to (a) the specific drawer that produced them and (b) the adapter that authored them — necessary for re-extraction workflows. Until that lands, adapters use `source_file` as the provenance key and record adapter authorship via a separate table or a predicate naming convention (e.g., `adapter:git:committed`).
This aligns with the existing architecture in `CLAUDE.md` ("Knowledge Graph: ENTITY → PREDICATE → ENTITY with valid_from / valid_to dates") — the RFC formalizes the adapter-side write path.
### 5.6 Source encoding and newline
Current ingesters handle encoding lossily (`errors="replace"` in `miner.py:595` and `normalize.py:124`) and do not record original encoding. The spec does **not** require per-drawer `source_encoding` / `source_newline` — most runs are uniform UTF-8 / LF, and storing the same value on every drawer wastes bytes.
Instead: adapters that handle non-UTF-8 or non-LF sources record the values once on the adapter's `SourceSummary` and per-drawer only when a specific drawer diverges from the adapter default. The `utf8_replace_invalid` declared transformation (§1.4) already communicates that lossy decoding happened; specific drawer-level provenance is opt-in.
---
## 6. Privacy class
### 6.1 Defined levels
| Level | Meaning | Example sources |
|---|---|---|
| `public` | Content intended for public consumption. | arXiv papers, public GitHub repos, published blogs. |
| `internal` | Organizational content, not for public disclosure. | Corporate Slack, internal Notion, private git repos. |
| `pii_potential` | May contain personally identifiable information. | Email, iMessage, Claude/ChatGPT transcripts. |
| `sensitive` | Known to contain PII, financial, or health data. | Medical records, financial statements, legal filings. |
| `secrets_possible` | May contain credentials or secrets. | Git history, environment dumps, CI logs. |
An adapter declares a default on `BaseSourceAdapter.default_privacy_class`. Users MAY override per-source in config.
### 6.2 Enforcement
- Each palace declares a `privacy_floor`. Drawers above the floor (equal to or laxer) are admitted; drawers below are rejected at write time and surfaced in a `rejected` list on the CLI and MCP tool.
- **Default floor: none** — v1 accepts all levels unless the palace explicitly configures a floor. This keeps the single-user local default low-friction (users who run `mempalace mine` on a git repo expect `secrets_possible` drawers to land). Enterprise deployments MUST set a floor; docs for regulated-domain setup will recommend starting strict and relaxing as needed.
- Search results surface `privacy_class` in result metadata. MCP tool wrappers MAY redact results above a caller-declared ceiling.
- `secrets_possible` drawers SHOULD pass through a secrets-scan pre-index hook when one is available. PR #389 (sensitive content scanner) is the expected enforcement mechanism for v1; until it lands, `secrets_possible` is a label without automated scanning. The label is still useful — it enables floor-based rejection and alerts downstream consumers.
- The privacy class is recorded in drawer metadata and cannot be downgraded without a migration log entry, matching RFC 001's embedder-identity pattern.
Privacy class is how a regulated-domain deployment (medical, legal, financial) can use MemPalace safely. Without it, flexible ingest becomes a liability; with it, ingest is scoped by policy.
---
## 7. Testing contract
### 7.1 The abstract suite
MemPalace ships `mempalace.sources.testing.AbstractSourceAdapterContractSuite` — a pytest mixin. Every adapter package ships a concrete subclass:
```python
from mempalace.sources.testing import AbstractSourceAdapterContractSuite
class TestCursorAdapter(AbstractSourceAdapterContractSuite):
@pytest.fixture
def adapter(self):
return CursorAdapter()
@pytest.fixture
def fixture_source(self, tmp_path):
"""Build a minimal Cursor workspaceStorage fixture."""
...
return SourceRef(local_path=str(tmp_path))
@pytest.fixture
def canonical_source_bytes(self, fixture_source):
"""Return a mapping of source_file -> authoritative bytes.
For filesystem sources: the file's raw bytes.
For SQLite sources: the extracted value column bytes for each row.
For API sources: the canonical HTTP response body bytes.
Adapter-defined — the adapter knows what its 'source bytes' are.
"""
...
```
The suite covers:
- `ingest` yields items with stable `source_file` and well-formed `version`.
- `is_current()` returns True when metadata matches, False when it differs.
- `close()` releases resources; subsequent calls raise `AdapterClosedError`.
- Unicode content and unicode identifiers are preserved end-to-end.
- Large-source handling: 10k+ items ingest without loading all into memory.
- Error paths: `SourceNotFoundError`, `AuthRequiredError` raise with correct types.
- `SourceRef.options` MUST NOT contain secrets — the adapter raises if it detects a value matching a common-secret pattern (GitHub token prefix, Slack token prefix, etc.). Advisory test, not blocking.
### 7.2 Byte-preserving round-trip (for `byte_preserving` adapters only)
Required for adapters advertising `byte_preserving`:
```python
def test_byte_preserving_round_trip(self, adapter, fixture_source, canonical_source_bytes):
"""Concatenated chunks must equal the canonical source bytes.
For each source_file in the fixture:
1. Read canonical_source_bytes[source_file].
2. Collect all DrawerRecords for that source_file from adapter.ingest(...).
Skip metadata_only drawers (§1.5).
3. Sort by chunk_index.
4. Concatenate record.content values.
5. Assert equality with the canonical bytes (UTF-8 decoded).
"""
```
Failure raises `TransformationViolationError`.
### 7.3 Declared-transformation round-trip (for `declared_lossy` adapters)
Required for adapters with non-empty `declared_transformations`:
```python
def test_declared_transformation_round_trip(self, adapter, fixture_source, canonical_source_bytes):
"""Adapter output must be reproducible by applying ONLY declared transformations.
1. For each source_file, read canonical_source_bytes.
2. Apply each declared transformation in declared_transformations to the bytes,
in the order declared by the adapter, using the reference implementations
in mempalace.sources.transforms.
3. Compare the result to the concatenated record.content values.
4. If they differ, the adapter has applied a transformation it did not declare.
Raise TransformationViolationError.
"""
```
For transformations not in the reserved list (§1.4) — adapter-custom names — the adapter MUST provide a reference implementation callable under `mempalace.sources.transforms.<adapter_name>_<transform_name>`. The conformance suite imports and applies it. Undiscoverable custom transforms fail the test.
### 7.4 Schema conformance
A generator-based property test validates that every record yielded by `ingest` across the fixture source has metadata matching `describe_schema()`. Missing required fields, wrong types, or (in strict mode) undeclared fields fail the test.
### 7.5 Note on current corpus
No existing test in `tests/` asserts byte-preservation or declared-transformation correctness (verified via grep of `tests/` for `verbatim|byte.?preserv|round.?trip`). This RFC's conformance suite introduces the first such coverage. The existing MISSION.md claim of "verbatim always" is a social contract until this lands; afterward it becomes a machine-verified property of adapters that declare `byte_preserving`.
---
## 8. Versioning and compatibility
- `BaseSourceAdapter.spec_version` declares which spec version an adapter implements.
- MemPalace refuses to load an adapter declaring a different major spec version.
- Minor spec versions are additive: new optional methods, new capability tokens, new reserved transformation names, new universal metadata fields with sensible defaults.
- Adapters MAY declare their own `adapter_version` independent of the spec version; this is recorded on every drawer (§5.1) and enables "this drawer was extracted by cursor-adapter 0.3; 0.4 fixed a parsing bug; re-extract affected drawers" workflows.
- This is spec v1.0.
---
## 9. Cleanup prerequisite (not in this spec, but gating)
The existing in-tree ingesters are not adapter-shaped. Before RFC 002 can be enforced, the following refactor lands in a separate PR:
- Introduce `mempalace/sources/base.py` defining `BaseSourceAdapter`, the typed records, and the registry.
- Introduce `mempalace/sources/transforms.py` with reference implementations of every reserved transformation in §1.4. Adapters and the conformance suite both consume these.
- `mempalace/miner.py` → `mempalace/sources/filesystem.py` implementing `BaseSourceAdapter`. Current behavior preserved: 800-char chunking becomes the adapter's default; `READABLE_EXTENSIONS` moves to the adapter; `detect_room()` and `detect_hall()` move to the adapter per §2.5. `declared_transformations = frozenset({"utf8_replace_invalid", "whitespace_trim"})`.
- `mempalace/convo_miner.py` → `mempalace/sources/conversations.py`. Exchange-pair chunking stays. The format-detection logic in `normalize.py` becomes per-format plugins the conversations adapter composes (one for Claude Code JSONL, one for Codex JSONL, one for ChatGPT mapping trees, one for Claude.ai JSON, one for Slack JSON) — each small and independently testable, eliminating the `if source_type` chain. `declared_transformations` enumerates every transformation `normalize.py` and `convo_miner._chunk_by_exchange` actually perform (see §1.4 "Existing code mapping").
- Closet-building wired into the conversations adapter's post-step (currently missing, per §1.7) — side effect of routing through the unified core post-step.
- `mempalace/cli.py` subcommand `mine` routes through the `mempalace.sources` registry. `--mode {projects,convos}` becomes a deprecated alias for `--source {filesystem,conversations}`.
- `mempalace/mcp_server.py` `mempalace_mine` tool accepts a `source` parameter.
- `mempalace/palace.py` exposes `PalaceContext` — a per-mine-invocation facade that bundles the drawer collection, closet collection, knowledge graph, palace config, and progress hooks. Adapters receive this; they do not import `palace.py` directly.
- `NORMALIZE_VERSION` (currently a module-level constant in `palace.py:50`) stays. It is the palace-wide schema version, orthogonal to per-adapter `adapter_version`.
- `KnowledgeGraph.add_triple()` (`knowledge_graph.py:130`) gains two optional parameters: `source_drawer_id: str = None` and `adapter_name: str = None`. Existing callers are unaffected; adapters advertising `supports_kg_triples` (§5.5) populate both. Backwards-compatible change.
This cleanup is substantial — comparable to RFC 001 §10's chroma-import removal — and should land before any new third-party adapter PR merges. Each new adapter is easier after the cleanup, not harder.
---
## 10. Impact on in-flight PRs
| PR / Issue | Effort to align |
|---|---|
| [#274](https://github.com/MemPalace/mempalace/issues/274) Cursor SQLite | Becomes `mempalace-source-cursor` third-party package. Author has a working prototype on Windows; needs `describe_schema()`, `declared_transformations`, and the conformance suite. Prior #287 (closed unmerged) is predecessor work. |
| [#23](https://github.com/MemPalace/mempalace/pull/23) OpenCode SQLite | Becomes `mempalace-source-opencode`. Same shape as Cursor. |
| [#169](https://github.com/MemPalace/mempalace/pull/169) Pi agent | Becomes `mempalace-source-pi` or a format plugin under the conversations adapter (depending on format similarity). |
| [#232](https://github.com/MemPalace/mempalace/pull/232) Cursor JSONL | Deprecated in favor of #274's SQLite path; or a second mode of `mempalace-source-cursor`. |
| [#567](https://github.com/MemPalace/mempalace/pull/567), [#98](https://github.com/MemPalace/mempalace/pull/98) git-mine | Closest existing work to what the spec envisions. Becomes first-party `mempalace/sources/git.py`. Exercises `whole_record` mode, `supports_structured_metadata`, `supports_closet_hints` (decision-signal quotes), `supports_kg_triples` (commit authorship, PR review relationships). |
| [#591](https://github.com/MemPalace/mempalace/pull/591), [#592](https://github.com/MemPalace/mempalace/pull/592) Delphi Oracle | Deferred. The live-stream pattern is out of scope for v1 (§Non-goals). A v1.1 addition will specify webhook/stream adapters. |
| [#702](https://github.com/MemPalace/mempalace/pull/702) Cursor + factory.ai | Splits into two adapter packages. |
| [#981](https://github.com/MemPalace/mempalace/issues/981) path-level descriptions | Absorbed by §1.5 `metadata_only` mode + §5.1 `ingest_mode`. A new first-party `descriptions` adapter or a second mode on `filesystem`. |
| [#244](https://github.com/MemPalace/mempalace/pull/244) Cursor memory-first MCP workflow docs | Points at `mempalace-source-cursor` once the adapter lands. |
| [#419](https://github.com/MemPalace/mempalace/pull/419), [#300](https://github.com/MemPalace/mempalace/pull/300), [#952](https://github.com/MemPalace/mempalace/pull/952) language-extension additions to `READABLE_EXTENSIONS` | Becomes per-language config on the filesystem adapter. Contributors can publish domain-specific adapters without touching core. |
| [#389](https://github.com/MemPalace/mempalace/pull/389) sensitive content scanner | Expected enforcement mechanism for the `secrets_possible` privacy class (§6.2). Not a blocker for this spec, but a natural consumer. |
| [#434](https://github.com/MemPalace/mempalace/pull/434) auto-populate KG from drawers | Complementary: post-hoc derivation of KG triples from drawer content. Adapters with `supports_kg_triples` provide the up-front path; #434 handles everything else. |
---
## 11. Open questions
1. **Cross-adapter dedup.** When a PR body is mined via `git` AND shows up as a conversation quote mined via `claude-code`, both drawers land. Is query-time dedup in `searcher.py` sufficient, or should core maintain a content-hash index across adapters? Declared non-goal in v1 but worth revisiting if user feedback demands it.
2. **Live-stream pattern.** Delphi Oracle (#591/592) and potentially Slack/Discord real-time ingestion need a push-mode contract. This is a v1.1 addition (streaming adapter trait + webhook surface), not blocking.
3. **LLM-assisted structured extraction.** Some adapters will want to call an LLM to extract structured fields. The spec does not standardize this — should it? Argument for: conformance test for LLM-driven fields, consistent caching. Argument against: local-first / zero-API is a core promise; LLM dependencies are opt-in per adapter.
4. **Adapter-vs-format split for conversations.** §9 proposes format plugins composed under a single conversations adapter. Alternative: one adapter per format (claude-code, chatgpt, codex, cursor-jsonl, slack). The trade-off is discoverability (one adapter is easier to find) vs. encapsulation (format plugins are simpler to test). Preference leans toward the single-adapter + plugin model; open to counter-argument.
5. **Default `privacy_floor`.** v1 defaults to none (§6.2) so single-user local mining is frictionless. An argument exists for defaulting to `pii_potential` — forces regulated-domain users to opt in to sensitive levels rather than opt out. Open to changing the default before v1 ships.
6. **`canonical_source_bytes` for API-backed adapters.** §7.1 defines this as adapter-declared. For API-backed adapters (Slack, Notion), what constitutes "canonical bytes" in a conformance test — the fixture's captured HTTP response? A serialized representation of the parsed object? Leaves to the adapter; may need a follow-up spec for common conventions.
7. **`adapter_version` bump semantics.** When does an adapter bump `adapter_version`? On any behavior change? On declared-transformation changes only? Suggests a follow-up doc on adapter SemVer conventions for the community to agree on.
---
## 12. Rollout
1. Land the cleanup PR (§9): introduce `mempalace/sources/`, refactor `miner.py` → filesystem adapter, `convo_miner.py` → conversations adapter, route CLI and MCP through the sources registry. Behavior preserved end-to-end. Closets get built for conversation drawers as a side effect.
2. Land this spec as-is. Add `AbstractSourceAdapterContractSuite`, entry-point discovery, `AdapterSchema` validation, privacy-class enforcement (floor-gated writes), declared-transformation reference implementations in `mempalace/sources/transforms.py`.
3. Land `mempalace/sources/git.py` as the first-party adapter absorbing #567. Exercises `whole_record`, `supports_structured_metadata`, `supports_closet_hints`, `supports_kg_triples` together.
4. Encourage the Cursor (#274), OpenCode (#23), and Pi (#169) authors to publish as third-party packages under `mempalace-source-*`. Offer review help against the spec.
5. Publish adapter-authoring docs at [mempalaceofficial.com/guide/authoring-sources](https://mempalaceofficial.com/guide/authoring-sources.html).
6. Update [ROADMAP.md](../../ROADMAP.md) with spec v1.0 adoption under v4.0.0-alpha.
-66
View File
@@ -1,66 +0,0 @@
# How to Use MemPalace Hooks (Auto-Save)
MemPalace hooks act as an "Auto-Save" feature. They help your AI keep a permanent memory without you needing to run manual commands.
### 1. What are these hooks?
* **Save Hook** (`mempal_save_hook.sh`): Saves new facts and decisions every 15 messages.
* **PreCompact Hook** (`mempal_precompact_hook.sh`): Saves your context right before the AI's memory window fills up.
### 2. Setup for Claude Code
Add this to `~/.claude/settings.local.json` (global) or `.claude/settings.local.json` (project-scoped) to enable automatic background saving:
```json
{
"hooks": {
"Stop": [
{
"matcher": "*",
"hooks": [{
"type": "command",
"command": "/absolute/path/to/hooks/mempal_save_hook.sh",
"timeout": 30
}]
}
],
"PreCompact": [
{
"hooks": [{
"type": "command",
"command": "/absolute/path/to/hooks/mempal_precompact_hook.sh",
"timeout": 30
}]
}
]
}
}
```
Make the hooks executable:
```bash
chmod +x /absolute/path/to/hooks/mempal_save_hook.sh
chmod +x /absolute/path/to/hooks/mempal_precompact_hook.sh
```
**Note:** Replace `/absolute/path/to/hooks/` with the actual path where you cloned the MemPalace repository (e.g., `~/projects/mempalace/hooks/`).
### 3. What changed (v3.1.0+)
Both hooks now have **two-layer capture**:
1. **Auto-mine**: Before blocking the AI, the hook runs the normalizer on the JSONL transcript and upserts chunks directly into the palace. This captures raw tool output (Bash results, search findings, build errors) that the AI would otherwise summarize away.
2. **Updated reason messages**: The block reason now explicitly tells the AI to save tool output verbatim — not just topics and decisions.
### 4. Backfill past conversations (one-time)
The hooks capture conversations going forward, but you probably have months of past sessions. Run this once to mine them all:
```bash
mempalace mine ~/.claude/projects/ --mode convos
```
### 5. Configuration
- **`SAVE_INTERVAL=15`** — How many human messages between saves
- **`MEMPALACE_PYTHON`** — Python interpreter with mempalace + chromadb. Auto-detects: env var → repo venv → system python3
- **`MEMPAL_DIR`** — Optional directory for auto-ingest via `mempalace mine`
-12
View File
@@ -1,12 +0,0 @@
#!/usr/bin/env python3
"""Example: mine a project folder into the palace."""
import sys
project_dir = sys.argv[1] if len(sys.argv) > 1 else "~/projects/my_app"
print("Step 1: Initialize rooms from folder structure")
print(f" mempalace init {project_dir}")
print("\nStep 2: Mine everything")
print(f" mempalace mine {project_dir}")
print("\nStep 3: Search")
print(" mempalace search 'why did we choose this approach'")
-11
View File
@@ -1,11 +0,0 @@
#!/usr/bin/env python3
"""Example: import Claude Code / ChatGPT conversations."""
print("Import Claude Code sessions:")
print(" mempalace mine ~/claude-sessions/ --mode convos --wing my_project")
print()
print("Import ChatGPT exports:")
print(" mempalace mine ~/chatgpt-exports/ --mode convos")
print()
print("Use general extractor for richer extraction:")
print(" mempalace mine ~/chats/ --mode convos --extract general")
-102
View File
@@ -1,102 +0,0 @@
# Gemini CLI Integration Guide
This guide explains how to set up MemPalace as a permanent memory for the [Gemini CLI](https://github.com/google/gemini-cli).
## Prerequisites
- Python 3.9+
- Gemini CLI installed and configured
## 1. Installation
On many Linux systems, installing Python packages globally is restricted. We
recommend [`uv`](https://docs.astral.sh/uv/), which creates and manages a
local virtual environment for you.
```bash
# Clone the repository (if you haven't already)
git clone https://github.com/MemPalace/mempalace.git
cd mempalace
# Create the venv and install MemPalace + dependencies in editable mode
uv sync
```
This produces a `.venv/` directory inside the repo with everything installed.
If you prefer plain pip:
```bash
python3 -m venv .venv
.venv/bin/pip install -e .
```
## 2. Initialization
Set up your "Palace" (the database) and configure your identity.
```bash
# Initialize the palace in the current directory
uv run python -m mempalace init .
```
### Identity and Wings (Optional but Recommended)
You can manually define who you are and what projects you work on by creating/editing these files in `~/.mempalace/`:
- **`~/.mempalace/identity.txt`**: A plain text file describing your role and focus.
- **`~/.mempalace/wing_config.json`**: A JSON file mapping projects and name variants to "Wings".
## 3. Connect to Gemini CLI (MCP)
Register MemPalace as an MCP server so Gemini CLI can use its tools.
```bash
gemini mcp add mempalace /absolute/path/to/mempalace/.venv/bin/python3 -m mempalace.mcp_server --scope user
```
*Note: Use the absolute path to ensure it works from any directory.*
## 4. Enable Auto-Saving (Hooks)
To ensure the AI saves memories automatically when conversation history becomes too long, add a `PreCompress` hook to your Gemini CLI settings.
Edit your `~/.gemini/settings.json` and add the following:
```json
{
"hooks": {
"PreCompress": [
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": "/absolute/path/to/mempalace/hooks/mempal_precompact_hook.sh"
}
]
}
]
}
}
```
Make sure the hook scripts are executable:
```bash
chmod +x hooks/*.sh
```
## 5. Usage
Once connected, Gemini CLI will automatically:
- Start the MemPalace server on launch.
- Use `mempalace_search` to find relevant past discussions.
- Use the `PreCompress` hook to save new memories before they are lost.
### Manual Mining
If you want the AI to learn from your existing code or docs immediately, run the "mine" command:
```bash
uv run python -m mempalace mine /path/to/your/project
```
### Verification
In a Gemini CLI session, you can run:
- `/mcp list`: Verify `mempalace` is `CONNECTED`.
- `/hooks panel`: Verify the `PreCompress` hook is active.
-27
View File
@@ -1,27 +0,0 @@
# MCP Integration — Claude Code
## Setup
Run the MCP server:
```bash
mempalace-mcp
```
Or add it to Claude Code:
```bash
claude mcp add mempalace -- mempalace-mcp
```
## Available Tools
The server exposes the full MemPalace MCP toolset. Common entry points include:
- **mempalace_status** — palace stats (wings, rooms, drawer counts)
- **mempalace_search** — semantic search across all memories
- **mempalace_list_wings** — list all projects in the palace
## Usage in Claude Code
Once configured, Claude Code can search your memories directly during conversations.
+91 -83
View File
@@ -2,17 +2,51 @@
These hook scripts make MemPalace save automatically. No manual "save" commands needed. These hook scripts make MemPalace save automatically. No manual "save" commands needed.
This deployment ships only the **remote** hook variants — the palace runs as a Docker container on a server (e.g. Unraid), and hooks `curl` the active session transcript to the server's `/ingest/transcript` endpoint over HTTPS with bearer auth. Server-side, the existing `mine_convos` pipeline handles entity detection, room assignment, dedup, and idempotency. See [`deploy/unraid/README.md`](../deploy/unraid/README.md) for the server side.
## What They Do ## What They Do
| Hook | When It Fires | What Happens | | Hook | When It Fires | What Happens |
|------|--------------|-------------| |---|---|---|
| **Save Hook** | Every 15 human messages | Auto-mines transcript (tool output included), then blocks the AI to save topics/decisions/quotes | | **Save Hook** (`mempal_save_hook_remote.sh`) | Every 15 user messages (configurable via `SAVE_INTERVAL`) | Backgrounded `curl` POSTs the active transcript. Returns immediately so the AI doesn't stall. Idempotent — failed retries are safe. |
| **PreCompact Hook** | Right before context compaction | Auto-mines transcript, then emergency save — forces the AI to save EVERYTHING before losing context | | **PreCompact Hook** (`mempal_precompact_hook_remote.sh`) | Right before context compaction | Synchronous `curl` POST. Blocks until the upload completes (or the hook timeout fires) so memory is durable before context shrinks. |
**Two-layer capture:** Hooks auto-mine the JSONL transcript directly into the palace (capturing raw tool output — Bash results, search findings, build errors). They also block the AI with a reason message telling it to save verbatim tool output and key context. Belt and suspenders — tool output gets stored even if the AI summarizes instead of quoting. **Two-layer capture.** The save hook ships the JSONL transcript directly to the server (capturing raw tool output — Bash results, search findings, build errors), where the miner files it verbatim into the palace. Tool output gets stored even if the AI summarizes instead of quoting.
## Env-var contract
The scripts read all configuration from environment variables. There is no script-level config to edit; the same script works against any number of machines.
| Variable | Required | Purpose |
|---|---|---|
| `MEMPAL_REMOTE_URL` | yes | Base URL of the MemPalace server, e.g. `https://unraid.local:8443`. |
| `MEMPAL_REMOTE_TOKEN` | yes | Bearer token shared with the server's `MEMPAL_TOKEN`. |
| `MEMPAL_REMOTE_INSECURE` | no | Set to `1` to skip TLS verification. Use only when the server uses Caddy's `tls internal` self-signed cert and the client hasn't trusted the root CA. |
| `MEMPAL_REMOTE_WING` | no | Force a specific wing for this client's transcripts. Default: server derives wing from the session id. |
| `SAVE_INTERVAL` | no | Override the default of 15 user messages. |
| `MEMPAL_PYTHON` | no | Path to a Python 3 interpreter. Only needs `json` + `sys` from stdlib — mempalace does not need to be installed in it. Used to parse the hook's stdin JSON. |
Set these persistently:
**PowerShell (Windows):**
```powershell
[Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_URL", "https://unraid.local:8443", "User")
[Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_TOKEN", "<the-token>", "User")
[Environment]::SetEnvironmentVariable("MEMPAL_REMOTE_INSECURE", "1", "User") # if self-signed
```
**Bash/Zsh:** add the same exports to `~/.zshrc` / `~/.bashrc`.
If `MEMPAL_REMOTE_URL` or `MEMPAL_REMOTE_TOKEN` is unset, the scripts no-op and log a one-liner — they never block the AI from stopping. Safe to install on a machine that doesn't have a remote configured yet.
## Install — Claude Code ## Install — Claude Code
Make the scripts executable:
```bash
chmod +x hooks/mempal_save_hook_remote.sh hooks/mempal_precompact_hook_remote.sh
```
Add to `.claude/settings.local.json`: Add to `.claude/settings.local.json`:
```json ```json
@@ -22,26 +56,21 @@ Add to `.claude/settings.local.json`:
"matcher": "*", "matcher": "*",
"hooks": [{ "hooks": [{
"type": "command", "type": "command",
"command": "/absolute/path/to/hooks/mempal_save_hook.sh", "command": "/absolute/path/to/hooks/mempal_save_hook_remote.sh",
"timeout": 30 "timeout": 30
}] }]
}], }],
"PreCompact": [{ "PreCompact": [{
"hooks": [{ "hooks": [{
"type": "command", "type": "command",
"command": "/absolute/path/to/hooks/mempal_precompact_hook.sh", "command": "/absolute/path/to/hooks/mempal_precompact_hook_remote.sh",
"timeout": 30 "timeout": 60
}] }]
}] }]
} }
} }
``` ```
Make them executable:
```bash
chmod +x hooks/mempal_save_hook.sh hooks/mempal_precompact_hook.sh
```
## Install — Codex CLI (OpenAI) ## Install — Codex CLI (OpenAI)
Add to `.codex/hooks.json`: Add to `.codex/hooks.json`:
@@ -50,132 +79,111 @@ Add to `.codex/hooks.json`:
{ {
"Stop": [{ "Stop": [{
"type": "command", "type": "command",
"command": "/absolute/path/to/hooks/mempal_save_hook.sh", "command": "/absolute/path/to/hooks/mempal_save_hook_remote.sh",
"timeout": 30 "timeout": 30
}], }],
"PreCompact": [{ "PreCompact": [{
"type": "command", "type": "command",
"command": "/absolute/path/to/hooks/mempal_precompact_hook.sh", "command": "/absolute/path/to/hooks/mempal_precompact_hook_remote.sh",
"timeout": 30 "timeout": 60
}] }]
} }
``` ```
## Configuration ## How it works
Edit `mempal_save_hook.sh` to change:
- **`SAVE_INTERVAL=15`** — How many human messages between saves. Lower = more frequent saves, higher = less interruption.
- **`STATE_DIR`** — Where hook state is stored (defaults to `~/.mempalace/hook_state/`)
- **`MEMPAL_DIR`** — Optional **project directory** (code, notes, docs) to also mine on each save trigger, with `--mode projects`. The hook ALWAYS mines the active conversation transcript automatically with `--mode convos``MEMPAL_DIR` is purely additive, never an override. Leave blank if you don't want to ingest project files.
- **`MEMPALACE_PYTHON`** — Optional env var. Python interpreter with mempalace + chromadb installed. Auto-detects: `MEMPALACE_PYTHON` env var → repo `venv/bin/python3` → system `python3`. Set this if your venv is in a non-standard location.
### mempalace CLI
The relevant commands are:
```bash
mempalace mine <dir> # Mine all files in a directory
mempalace mine <dir> --mode convos # Mine conversation transcripts only
```
The hooks resolve the repo root automatically from their own path, so they work regardless of where you install the repo.
## How It Works (Technical)
### Save Hook (Stop event) ### Save Hook (Stop event)
``` ```
User sends message → AI responds → Claude Code fires Stop hook User sends message → AI responds → Claude Code fires Stop hook
Hook counts human messages in JSONL transcript Hook counts user messages in JSONL transcript
┌─── < 15 since last save ──→ echo "{}" (let AI stop) ┌─── < SAVE_INTERVAL since last save ──→ echo "{}" (let AI stop)
└─── ≥ 15 since last save └─── ≥ SAVE_INTERVAL since last save
Auto-mine transcript → palace (tool output captured) Background curl POST → server /ingest/transcript
{"decision": "block", "reason": "save tool output verbatim..."} Hook returns {} immediately (AI stops normally)
AI saves to palace (topics, decisions, quotes) Server-side miner runs in background, files drawers
AI tries to stop again
stop_hook_active = true
Hook sees flag → echo "{}" (let it through)
``` ```
The `stop_hook_active` flag prevents infinite loops: block once → AI saves → tries to stop → flag is true → we let it through.
### PreCompact Hook ### PreCompact Hook
``` ```
Context window getting full → Claude Code fires PreCompact Context window getting full → Claude Code fires PreCompact
Find transcript (from input or session_id lookup) Synchronous curl POST → server /ingest/transcript
Auto-mine transcript → palace (tool output captured) Wait for 200 OK (or hook timeout)
{"decision": "block", "reason": "save tool output verbatim..."} echo "{}" → Compaction proceeds
AI saves everything
Compaction proceeds
``` ```
No counting needed — compaction always warrants a save. The auto-mine captures raw tool output before the AI gets a chance to summarize it away. Synchronous on PreCompact is intentional — this is the safety net before context shrinks. The Claude Code hook timeout (set in `settings.local.json`) bounds how long we'll wait.
## Debugging ## Debugging
Check the hook log:
```bash ```bash
cat ~/.mempalace/hook_state/hook.log tail -f ~/.mempalace/hook_state/hook.log
``` ```
Example output: Example:
``` ```
[14:30:15] Session abc123: 12 exchanges, 12 since last save [14:30:15] Session abc123: 12 exchanges, 12 since last save
[14:35:22] Session abc123: 15 exchanges, 15 since last save [14:35:22] Session abc123: 15 exchanges, 15 since last save
[14:35:22] TRIGGERING SAVE at exchange 15 [14:35:22] ingest ok
[14:40:01] Session abc123: 18 exchanges, 3 since last save [14:50:18] PRE-COMPACT triggered for session abc123
[14:50:19] PRE-COMPACT ingest ok
``` ```
## Known Limitations A 401 response means the bearer token is wrong. A connection error means the URL/cert is wrong (or the server is down). All curl output goes to the same log.
**Hooks require session restart after install.** Claude Code loads hooks from `settings.json` at session start only. If you run `mempalace init` or manually edit hook config mid-session, the hooks won't fire until you restart Claude Code. This is a Claude Code limitation. ## Known limitations
**`MEMPAL_PYTHON` override for the hook's internal Python calls.** The save hook parses its JSON input and counts transcript messages with `python3`. When the harness is launched from a GUI on macOS — `open -a`, Spotlight, the dock — its `PATH` is the minimal `/usr/bin:/bin:/usr/sbin:/sbin` inherited from `launchd`, not your shell PATH. If `python3` isn't on that PATH, those internal calls fail and the hook can't count exchanges. **Hooks require session restart after install.** Claude Code loads hooks from `settings.json` at session start only. If you edit hook config mid-session, restart Claude Code to pick up changes.
Point the hook at any Python 3 interpreter to fix it: **Python interpreter resolution.** The scripts parse hook stdin JSON with `python3`. When Claude Code is launched from a GUI on macOS (Spotlight, dock, `open -a`), its `PATH` is the minimal `/usr/bin:/bin:/usr/sbin:/sbin` inherited from `launchd` rather than your shell PATH. If `python3` isn't there, set `MEMPAL_PYTHON` to a known-good interpreter:
```bash ```bash
export MEMPAL_PYTHON="/usr/bin/python3" # system Python is fine export MEMPAL_PYTHON="/usr/bin/python3"
export MEMPAL_PYTHON="$HOME/.venvs/mempalace/bin/python" # or your venv # or:
export MEMPAL_PYTHON="$HOME/.venvs/x/bin/python"
``` ```
Resolution priority: `$MEMPAL_PYTHON` (if set and executable)`$(command -v python3)` → bare `python3`. The interpreter only needs `json` and `sys` from the standard library — `mempalace` itself does not need to be installed in it. Resolution priority: `$MEMPAL_PYTHON``$(command -v python3)` → bare `python3`. The interpreter only needs `json` and `sys` mempalace itself does not need to be installed.
Note: the `mempalace mine` auto-ingest runs via the `mempalace` CLI, so that command also needs to be on the hook's `PATH`. Installing with `pipx install mempalace` or `uv tool install mempalace` puts it on a stable global location; otherwise extend the hook environment's `PATH` to include your venv's `bin/`. **`MineAlreadyRunning` collisions.** If two clients ingest simultaneously, the second one's request returns 500 because the server-side `mine_lock` is held. The save hook is idempotent — the next save catches up. If you see this constantly in the log, raise `SAVE_INTERVAL` on the chattier client.
## Backfill Past Conversations ## Backfilling past conversations
The hooks only capture conversations going forward. To mine **past** Claude Code sessions into your palace, run a one-time backfill: The hooks only capture sessions going forward. To mine **past** sessions into the remote palace, loop `curl` over them:
```bash ```bash
mempalace mine ~/.claude/projects/ --mode convos # Claude Code sessions
for f in ~/.claude/projects/**/*.jsonl; do
curl -k -X POST \
-H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
-H "X-Session-Id: $(basename "$f" .jsonl)" \
--data-binary @"$f" \
"$MEMPAL_REMOTE_URL/ingest/transcript"
done
# Codex CLI sessions
for f in ~/.codex/sessions/**/*.jsonl; do
curl -k -X POST \
-H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
-H "X-Session-Id: $(basename "$f" .jsonl)" \
--data-binary @"$f" \
"$MEMPAL_REMOTE_URL/ingest/transcript"
done
``` ```
This scans all JSONL transcripts from previous sessions and files them into the `conversations` wing. On a typical developer machine with months of history, this can yield 50K200K drawers. The server-side miner is idempotent — re-uploading the same transcript won't double-file. Drop `-k` once Caddy's root CA is trusted on the client.
For Codex CLI sessions:
```bash
mempalace mine ~/.codex/sessions/ --mode convos
```
This only needs to be done once — after that, the hooks auto-mine each session as you go.
## Cost ## Cost
**Zero extra tokens.** The hooks notify the AI that saves happened in the background — the AI doesn't need to write anything in the chat. All filing is handled automatically. Previous versions asked the AI to write diary entries and drawer content in the chat window, which cost ~$1/session in retransmitted tokens. **Zero extra tokens.** The hooks save in the background — the AI doesn't need to write anything in the chat window. All filing happens server-side after the upload returns.
-123
View File
@@ -1,123 +0,0 @@
#!/bin/bash
# MEMPALACE PRE-COMPACT HOOK — Emergency save before compaction
#
# Claude Code "PreCompact" hook. Fires RIGHT BEFORE the conversation
# gets compressed to free up context window space.
#
# This is the safety net. When compaction happens, the AI loses detailed
# context about what was discussed. This hook forces one final save of
# EVERYTHING before that happens.
#
# Unlike the save hook (which triggers every N exchanges), this ALWAYS
# blocks — because compaction is always worth saving before.
#
# === INSTALL ===
# Add to .claude/settings.local.json:
#
# "hooks": {
# "PreCompact": [{
# "hooks": [{
# "type": "command",
# "command": "/absolute/path/to/mempal_precompact_hook.sh",
# "timeout": 30
# }]
# }]
# }
#
# For Codex CLI, add to .codex/hooks.json:
#
# "PreCompact": [{
# "type": "command",
# "command": "/absolute/path/to/mempal_precompact_hook.sh",
# "timeout": 30
# }]
#
# === HOW IT WORKS ===
#
# Claude Code sends JSON on stdin with:
# session_id — unique session identifier
#
# We always return decision: "block" with a reason telling the AI
# to save everything. After the AI saves, compaction proceeds normally.
#
# === MEMPALACE CLI ===
# The hook ALWAYS mines the active conversation transcript synchronously
# before compaction (via `mempalace mine <transcript-dir> --mode convos`).
# MEMPAL_DIR is an *additional*, optional target for project files — it
# does not replace the conversation mine.
STATE_DIR="$HOME/.mempalace/hook_state"
mkdir -p "$STATE_DIR"
# Optional: project directory (code / notes / docs) to also mine before
# compaction. Mined with `--mode projects`. The conversation transcript
# is always mined regardless — this is purely additive.
# Example: MEMPAL_DIR="$HOME/projects/my_app"
MEMPAL_DIR=""
# Resolve the Python interpreter. Same contract as mempal_save_hook.sh:
# MEMPAL_PYTHON (explicit override) → $(command -v python3) → bare python3.
MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
fi
# Read JSON input from stdin
INPUT=$(cat)
# Parse session_id and transcript_path in one call. Sanitize both, then
# read sanitized values from one-per-line stdout into shell variables —
# avoids ``eval`` on generated code (#1231 review). Same contract as
# mempal_save_hook.sh.
mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
import sys, json, re
data = json.load(sys.stdin)
sid = data.get('session_id', 'unknown')
tp = data.get('transcript_path', '')
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
print(safe(sid))
print(safe(tp))
" 2>/dev/null)
SESSION_ID="${_mempal_parsed[0]:-unknown}"
TRANSCRIPT_PATH="${_mempal_parsed[1]:-}"
# Expand ~ in path
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
# Validate that TRANSCRIPT_PATH looks like a transcript file. Mirrors
# mempalace.hooks_cli._validate_transcript_path so the shell hook
# rejects the same shapes the Python hook rejects (#1231 review).
is_valid_transcript_path() {
local path="$1"
[ -n "$path" ] || return 1
case "$path" in
*.json|*.jsonl) ;;
*) return 1 ;;
esac
case "/$path/" in
*/../*) return 1 ;;
esac
return 0
}
echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log"
# Run ingest synchronously so memories land before compaction. Two
# independent targets — both run if both are set:
# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
# 2. MEMPAL_DIR → --mode projects
if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
>> "$STATE_DIR/hook.log" 2>&1
elif [ -n "$TRANSCRIPT_PATH" ]; then
echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
>> "$STATE_DIR/hook.log"
fi
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
mempalace mine "$MEMPAL_DIR" --mode projects \
>> "$STATE_DIR/hook.log" 2>&1
fi
# Silent: return empty JSON to not block. "decision": "allow" is invalid —
# only "block" or {} are recognized.
echo '{}'
+102
View File
@@ -0,0 +1,102 @@
#!/bin/bash
# MEMPALACE PRE-COMPACT HOOK (REMOTE) — emergency save before compaction.
#
# Drop-in replacement for mempal_precompact_hook.sh when MemPalace runs
# on a server. Always synchronous: we wait for the upload to complete
# before returning so the transcript is on the server before the
# conversation gets compressed.
#
# Required env vars (same as the save hook):
# MEMPAL_REMOTE_URL e.g. https://unraid.local:8443
# MEMPAL_REMOTE_TOKEN bearer token
# Optional:
# MEMPAL_REMOTE_WING explicit wing override
# MEMPAL_REMOTE_INSECURE "1" for self-signed cert
#
# === INSTALL ===
# Add to .claude/settings.local.json:
#
# "hooks": {
# "PreCompact": [{
# "hooks": [{
# "type": "command",
# "command": "/abs/path/to/mempal_precompact_hook_remote.sh",
# "timeout": 60
# }]
# }]
# }
set -u
STATE_DIR="$HOME/.mempalace/hook_state"
mkdir -p "$STATE_DIR"
MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
fi
if [ -z "${MEMPAL_REMOTE_URL:-}" ] || [ -z "${MEMPAL_REMOTE_TOKEN:-}" ]; then
echo "[$(date '+%H:%M:%S')] PRE-COMPACT: MEMPAL_REMOTE_URL/TOKEN not set — skipping" \
>> "$STATE_DIR/hook.log"
echo "{}"
exit 0
fi
INPUT=$(cat)
mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
import sys, json, re
data = json.load(sys.stdin)
sid = data.get('session_id', 'unknown')
tp = data.get('transcript_path', '')
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
print(safe(sid))
print(safe(tp))
" 2>/dev/null)
SESSION_ID="${_mempal_parsed[0]:-unknown}"
TRANSCRIPT_PATH="${_mempal_parsed[1]:-}"
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
is_valid_transcript_path() {
local path="$1"
[ -n "$path" ] || return 1
case "$path" in
*.json|*.jsonl) ;;
*) return 1 ;;
esac
case "/$path/" in
*/../*) return 1 ;;
esac
return 0
}
echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" \
>> "$STATE_DIR/hook.log"
# Synchronous upload — pre-compact is the safety net, blocking is correct
# here. The Claude Code hook timeout (set in settings.local.json) bounds
# how long we'll wait.
if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
CURL_OPTS=("-sS" "--max-time" "55" "-X" "POST")
[ "${MEMPAL_REMOTE_INSECURE:-0}" = "1" ] && CURL_OPTS+=("-k")
WING_HEADER=()
[ -n "${MEMPAL_REMOTE_WING:-}" ] && WING_HEADER=(-H "X-Wing: $MEMPAL_REMOTE_WING")
curl "${CURL_OPTS[@]}" \
-H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
-H "X-Session-Id: $SESSION_ID" \
-H "Content-Type: application/octet-stream" \
"${WING_HEADER[@]}" \
--data-binary "@$TRANSCRIPT_PATH" \
"$MEMPAL_REMOTE_URL/ingest/transcript" \
>> "$STATE_DIR/hook.log" 2>&1 \
&& echo "[$(date '+%H:%M:%S')] PRE-COMPACT ingest ok" >> "$STATE_DIR/hook.log" \
|| echo "[$(date '+%H:%M:%S')] PRE-COMPACT ingest FAILED — context will compact unsaved" \
>> "$STATE_DIR/hook.log"
elif [ -n "$TRANSCRIPT_PATH" ]; then
echo "[$(date '+%H:%M:%S')] PRE-COMPACT: invalid transcript path: $TRANSCRIPT_PATH" \
>> "$STATE_DIR/hook.log"
fi
echo "{}"
-223
View File
@@ -1,223 +0,0 @@
#!/bin/bash
# MEMPALACE SAVE HOOK — Auto-save every N exchanges
#
# Claude Code "Stop" hook. After every assistant response:
# 1. Counts human messages in the session transcript
# 2. Every SAVE_INTERVAL messages, BLOCKS the AI from stopping
# 3. Returns a reason telling the AI to save structured diary + palace entries
# 4. AI does the save (topics, decisions, code, quotes → organized into palace)
# 5. Next Stop fires with stop_hook_active=true → lets AI stop normally
#
# The AI does the classification — it knows what wing/hall/closet to use
# because it has context about the conversation. No regex needed.
#
# === INSTALL ===
# Add to .claude/settings.local.json:
#
# "hooks": {
# "Stop": [{
# "matcher": "*",
# "hooks": [{
# "type": "command",
# "command": "/absolute/path/to/mempal_save_hook.sh",
# "timeout": 30
# }]
# }]
# }
#
# For Codex CLI, add to .codex/hooks.json:
#
# "Stop": [{
# "type": "command",
# "command": "/absolute/path/to/mempal_save_hook.sh",
# "timeout": 30
# }]
#
# === HOW IT WORKS ===
#
# Claude Code sends JSON on stdin with these fields:
# session_id — unique session identifier
# stop_hook_active — true if AI is already in a save cycle (prevents infinite loop)
# transcript_path — path to the JSONL transcript file
#
# When we block, Claude Code shows our "reason" to the AI as a system message.
# The AI then saves to memory, and when it tries to stop again,
# stop_hook_active=true so we let it through. No infinite loop.
#
# === MEMPALACE CLI ===
# The hook ALWAYS mines the active conversation transcript automatically
# (via `mempalace mine <transcript-dir> --mode convos`). MEMPAL_DIR is an
# *additional*, optional target for project files — it does not replace
# the conversation mine.
#
# === CONFIGURATION ===
SAVE_INTERVAL=15 # Save every N human messages (adjust to taste)
STATE_DIR="$HOME/.mempalace/hook_state"
mkdir -p "$STATE_DIR"
# Optional: project directory (code / notes / docs) to also mine each
# save trigger. Mined with `--mode projects`. The conversation transcript
# is always mined regardless — this is purely additive.
# Example: MEMPAL_DIR="$HOME/projects/my_app"
MEMPAL_DIR=""
# Resolve the Python interpreter the hook should use.
#
# Why this is nontrivial: GUI-launched Claude Code on macOS (or any harness
# that doesn't inherit the user's shell PATH) may find a `python3` on PATH
# that lacks mempalace — e.g. /usr/bin/python3 while the user installed
# mempalace into a venv or pyenv. Users in that situation can point the
# hook at the right interpreter by exporting MEMPAL_PYTHON.
#
# Resolution order (first hit wins):
# 1. $MEMPAL_PYTHON — explicit user override (absolute path)
# 2. $(command -v python3) — first python3 on the hook's PATH
# 3. bare "python3" — last-resort fallback (hope the PATH has it)
MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
fi
# Read JSON input from stdin
INPUT=$(cat)
# Parse all fields in a single Python call (3x faster than separate invocations)
# without invoking ``eval`` on generated code: Python prints one sanitized
# value per line, the shell reads them via ``mapfile`` and does plain
# variable assignment — same data, smaller blast radius if the sanitizer
# is ever bypassed (#1231 review).
mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
import sys, json, re
data = json.load(sys.stdin)
sid = data.get('session_id', 'unknown')
sha_raw = data.get('stop_hook_active', False)
tp = data.get('transcript_path', '')
# Shell-safe output — only allow alphanumeric, underscore, hyphen, slash, dot, tilde
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
# Coerce stop_hook_active to strict boolean string
sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
print(safe(sid))
print(sha)
print(safe(tp))
" 2>/dev/null)
SESSION_ID="${_mempal_parsed[0]:-unknown}"
STOP_HOOK_ACTIVE="${_mempal_parsed[1]:-False}"
TRANSCRIPT_PATH="${_mempal_parsed[2]:-}"
# Expand ~ in path
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
# Validate that TRANSCRIPT_PATH looks like a transcript file:
# - non-empty
# - .jsonl or .json suffix
# - no traversal segments (.. components)
# Mirrors mempalace.hooks_cli._validate_transcript_path so the shell hook
# rejects the same shapes the Python hook rejects (#1231 review).
is_valid_transcript_path() {
local path="$1"
[ -n "$path" ] || return 1
case "$path" in
*.json|*.jsonl) ;;
*) return 1 ;;
esac
case "/$path/" in
*/../*) return 1 ;;
esac
return 0
}
# If we're already in a save cycle, let the AI stop normally
# This is the infinite-loop prevention: block once → AI saves → tries to stop again → we let it through
if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
echo "{}"
exit 0
fi
# Count human messages in the JSONL transcript
# SECURITY: Pass transcript path as sys.argv to avoid shell injection via crafted paths
if [ -f "$TRANSCRIPT_PATH" ]; then
EXCHANGE_COUNT=$("$MEMPAL_PYTHON_BIN" - "$TRANSCRIPT_PATH" <<'PYEOF'
import json, sys
count = 0
with open(sys.argv[1]) as f:
for line in f:
try:
entry = json.loads(line)
msg = entry.get('message', {})
if isinstance(msg, dict) and msg.get('role') == 'user':
content = msg.get('content', '')
if isinstance(content, str) and '<command-message>' in content:
continue
count += 1
except:
pass
print(count)
PYEOF
2>/dev/null)
else
EXCHANGE_COUNT=0
fi
# Track last save point for this session
LAST_SAVE_FILE="$STATE_DIR/${SESSION_ID}_last_save"
LAST_SAVE=0
if [ -f "$LAST_SAVE_FILE" ]; then
LAST_SAVE_RAW=$(cat "$LAST_SAVE_FILE")
# SECURITY: Validate as plain integer before arithmetic to prevent command injection
if [[ "$LAST_SAVE_RAW" =~ ^[0-9]+$ ]]; then
LAST_SAVE="$LAST_SAVE_RAW"
fi
fi
SINCE_LAST=$((EXCHANGE_COUNT - LAST_SAVE))
# Log for debugging (check ~/.mempalace/hook_state/hook.log)
echo "[$(date '+%H:%M:%S')] Session $SESSION_ID: $EXCHANGE_COUNT exchanges, $SINCE_LAST since last save" >> "$STATE_DIR/hook.log"
# Time to save?
if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
# Update last save point
echo "$EXCHANGE_COUNT" > "$LAST_SAVE_FILE"
echo "[$(date '+%H:%M:%S')] TRIGGERING SAVE at exchange $EXCHANGE_COUNT" >> "$STATE_DIR/hook.log"
# Auto-mine. Two independent targets — both run if both are set:
# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
# (Claude Code session JSONL — must use the convo miner)
# 2. MEMPAL_DIR (user-configured project) → --mode projects
# (code, notes, docs)
# MEMPAL_DIR is *additive*, not an override: a user with MEMPAL_DIR
# pointed at their project still gets the active conversation mined.
if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
>> "$STATE_DIR/hook.log" 2>&1 &
elif [ -n "$TRANSCRIPT_PATH" ]; then
echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
>> "$STATE_DIR/hook.log"
fi
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
mempalace mine "$MEMPAL_DIR" --mode projects \
>> "$STATE_DIR/hook.log" 2>&1 &
fi
# MEMPAL_VERBOSE toggle:
# true = developer mode — block and show diaries/code in chat
# false = silent mode (default) — save in background, no chat clutter
# Set via: export MEMPAL_VERBOSE=true
if [ "$MEMPAL_VERBOSE" = "true" ] || [ "$MEMPAL_VERBOSE" = "1" ]; then
cat << 'HOOKJSON'
{
"decision": "block",
"reason": "MemPalace save checkpoint. Write a brief session diary entry covering key topics, decisions, and code changes since the last save. Use verbatim quotes where possible. Continue after saving."
}
HOOKJSON
else
# Silent mode: return empty JSON to not block. "decision": "allow" is
# not a valid value — only "block" or {} are recognized.
echo '{}'
fi
else
# Not time yet — let the AI stop normally
echo "{}"
fi
+170
View File
@@ -0,0 +1,170 @@
#!/bin/bash
# MEMPALACE SAVE HOOK (REMOTE) — Auto-save every N exchanges to a remote palace.
#
# Drop-in replacement for mempal_save_hook.sh when MemPalace runs on a
# server (e.g. Unraid) instead of the dev machine. Same trigger logic
# (count human messages, fire every SAVE_INTERVAL), but instead of running
# `mempalace mine` locally it POSTs the active transcript to the server's
# /ingest/transcript endpoint.
#
# Required env vars:
# MEMPAL_REMOTE_URL Base URL of the MemPalace server, e.g.
# https://unraid.local:8443
# MEMPAL_REMOTE_TOKEN Bearer token (same one configured in the server's
# .env / MEMPAL_TOKEN).
#
# Optional env vars:
# MEMPAL_REMOTE_WING Wing name to file under (defaults to the
# session-id-derived inbox name on the server).
# MEMPAL_REMOTE_INSECURE "1" to skip TLS verification — needed when
# the server uses Caddy's self-signed `tls
# internal` cert and the client hasn't trusted
# the Caddy root CA.
# SAVE_INTERVAL Override the default of 15 messages.
#
# === INSTALL ===
# Add to .claude/settings.local.json (Claude Code):
#
# "hooks": {
# "Stop": [{
# "matcher": "*",
# "hooks": [{
# "type": "command",
# "command": "/abs/path/to/mempal_save_hook_remote.sh",
# "timeout": 30
# }]
# }]
# }
#
# For Codex CLI, add the same shape to .codex/hooks.json.
set -u
SAVE_INTERVAL="${SAVE_INTERVAL:-15}"
STATE_DIR="$HOME/.mempalace/hook_state"
mkdir -p "$STATE_DIR"
# Resolve Python — used only for parsing the hook's stdin JSON.
MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
fi
# Pre-flight: bail with a clean no-op if config is missing. Returning {}
# lets Claude Code stop normally; we log the reason for the user to find.
if [ -z "${MEMPAL_REMOTE_URL:-}" ] || [ -z "${MEMPAL_REMOTE_TOKEN:-}" ]; then
echo "[$(date '+%H:%M:%S')] MEMPAL_REMOTE_URL/TOKEN not set — skipping" \
>> "$STATE_DIR/hook.log"
echo "{}"
exit 0
fi
INPUT=$(cat)
# Parse session_id, stop_hook_active, transcript_path in one Python call —
# same sanitization shape as the local hook.
mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
import sys, json, re
data = json.load(sys.stdin)
sid = data.get('session_id', 'unknown')
sha_raw = data.get('stop_hook_active', False)
tp = data.get('transcript_path', '')
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
print(safe(sid))
print(sha)
print(safe(tp))
" 2>/dev/null)
SESSION_ID="${_mempal_parsed[0]:-unknown}"
STOP_HOOK_ACTIVE="${_mempal_parsed[1]:-False}"
TRANSCRIPT_PATH="${_mempal_parsed[2]:-}"
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
is_valid_transcript_path() {
local path="$1"
[ -n "$path" ] || return 1
case "$path" in
*.json|*.jsonl) ;;
*) return 1 ;;
esac
case "/$path/" in
*/../*) return 1 ;;
esac
return 0
}
if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
echo "{}"
exit 0
fi
# Count human messages (same logic as local hook).
if [ -f "$TRANSCRIPT_PATH" ]; then
EXCHANGE_COUNT=$("$MEMPAL_PYTHON_BIN" - "$TRANSCRIPT_PATH" <<'PYEOF'
import json, sys
count = 0
with open(sys.argv[1]) as f:
for line in f:
try:
entry = json.loads(line)
msg = entry.get('message', {})
if isinstance(msg, dict) and msg.get('role') == 'user':
content = msg.get('content', '')
if isinstance(content, str) and '<command-message>' in content:
continue
count += 1
except Exception:
pass
print(count)
PYEOF
2>/dev/null)
else
EXCHANGE_COUNT=0
fi
LAST_SAVE_FILE="$STATE_DIR/${SESSION_ID}_last_save"
LAST_SAVE=0
if [ -f "$LAST_SAVE_FILE" ]; then
LAST_SAVE_RAW=$(cat "$LAST_SAVE_FILE")
if [[ "$LAST_SAVE_RAW" =~ ^[0-9]+$ ]]; then
LAST_SAVE="$LAST_SAVE_RAW"
fi
fi
SINCE_LAST=$((EXCHANGE_COUNT - LAST_SAVE))
echo "[$(date '+%H:%M:%S')] Session $SESSION_ID: $EXCHANGE_COUNT exchanges, $SINCE_LAST since last save" \
>> "$STATE_DIR/hook.log"
if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
echo "$EXCHANGE_COUNT" > "$LAST_SAVE_FILE"
CURL_OPTS=("-sS" "--max-time" "30" "-X" "POST")
[ "${MEMPAL_REMOTE_INSECURE:-0}" = "1" ] && CURL_OPTS+=("-k")
WING_HEADER=()
[ -n "${MEMPAL_REMOTE_WING:-}" ] && WING_HEADER=(-H "X-Wing: $MEMPAL_REMOTE_WING")
# Background the upload so we don't block the AI's stop. The hook
# exits immediately with {} — the next save retry will catch any
# transient failure (the miner is idempotent server-side).
(
curl "${CURL_OPTS[@]}" \
-H "Authorization: Bearer $MEMPAL_REMOTE_TOKEN" \
-H "X-Session-Id: $SESSION_ID" \
-H "Content-Type: application/octet-stream" \
"${WING_HEADER[@]}" \
--data-binary "@$TRANSCRIPT_PATH" \
"$MEMPAL_REMOTE_URL/ingest/transcript" \
>> "$STATE_DIR/hook.log" 2>&1 \
&& echo "[$(date '+%H:%M:%S')] ingest ok" >> "$STATE_DIR/hook.log" \
|| echo "[$(date '+%H:%M:%S')] ingest failed (will retry next save)" \
>> "$STATE_DIR/hook.log"
) &
disown
elif [ -n "$TRANSCRIPT_PATH" ]; then
echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
>> "$STATE_DIR/hook.log"
fi
fi
echo "{}"
-154
View File
@@ -1,154 +0,0 @@
---
name: mempalace
description: "MemPalace — Local AI memory with 96.6% recall. Semantic search, temporal knowledge graph, palace architecture (wings/rooms/drawers). Free, no cloud, no API keys."
version: 3.3.0
homepage: https://github.com/MemPalace/mempalace
user-invocable: true
metadata:
openclaw:
emoji: "\U0001F3DB"
os:
- darwin
- linux
- win32
requires:
anyBins:
- mempalace
- python3
install:
- id: mempalace-pip
kind: uv
label: "Install MemPalace (Python, local ChromaDB)"
package: mempalace
bins:
- mempalace
---
# MemPalace — Local AI Memory System
You have access to a local memory palace via MCP tools. The palace stores verbatim conversation history and a temporal knowledge graph — all on the user's machine, zero cloud, zero API calls.
## Architecture
- **Wings** = people or projects (e.g. `wing_alice`, `wing_myproject`)
- **Halls** = categories (facts, events, preferences, advice)
- **Rooms** = specific topics (e.g. `chromadb-setup`, `riley-school`)
- **Drawers** = individual memory chunks (verbatim text)
- **Knowledge Graph** = entity-relationship facts with time validity
## Protocol — FOLLOW THIS EVERY SESSION
1. **ON WAKE-UP**: Call `mempalace_status` to load palace overview and AAAK dialect spec.
2. **BEFORE RESPONDING** about any person, project, or past event: call `mempalace_search` or `mempalace_kg_query` FIRST. Never guess from memory — verify from the palace.
3. **IF UNSURE** about a fact (name, age, relationship, preference): say "let me check" and query. Wrong is worse than slow.
4. **AFTER EACH SESSION**: Call `mempalace_diary_write` to record what happened, what you learned, what matters.
5. **WHEN FACTS CHANGE**: Call `mempalace_kg_invalidate` on the old fact, then `mempalace_kg_add` for the new one.
## Available Tools
### Search & Browse
- `mempalace_search` — Semantic search across all memories. Always start here.
- `query` (required): natural language search — keep it short, keywords or a question. Do NOT include system prompts or conversation context.
- `wing`: filter by wing
- `room`: filter by room
- `limit`: max results (default 5)
- `mempalace_check_duplicate` — Check if content already exists before filing.
- `content` (required): text to check
- `threshold`: similarity threshold (default 0.9 — lowering to 0.850.87 often catches more near-duplicates without significant false positives)
- `mempalace_status` — Palace overview: total drawers, wings, rooms, AAAK spec
- `mempalace_list_wings` — All wings with drawer counts
- `mempalace_list_rooms` — Rooms within a wing (optional wing filter)
- `mempalace_get_taxonomy` — Full wing/room/count tree
- `mempalace_get_aaak_spec` — Get AAAK compression dialect specification
### Knowledge Graph (Temporal Facts)
- `mempalace_kg_query` — Query entity relationships. Supports time filtering.
- `entity` (required): e.g. "Max", "MyProject"
- `as_of`: date filter (YYYY-MM-DD) — what was true at that time
- `direction`: "outgoing", "incoming", or "both" (default "both")
- `mempalace_kg_add` — Add a fact: subject -> predicate -> object
- `subject`, `predicate`, `object` (required)
- `valid_from`: when this became true
- `source_closet`: source reference
- `mempalace_kg_invalidate` — Mark a fact as no longer true
- `subject`, `predicate`, `object` (required)
- `ended`: when it stopped being true (default: today)
- `mempalace_kg_timeline` — Chronological story of an entity
- `entity`: filter by entity name (optional — all events if omitted)
- `mempalace_kg_stats` — Graph overview: entities, triples, relationship types
### Palace Graph (Cross-Domain Connections)
- `mempalace_traverse` — Walk from a room, find connected ideas across wings
- `start_room` (required): room to start from
- `max_hops`: connection depth (default 2)
- `mempalace_find_tunnels` — Find rooms that bridge two wings
- `wing_a`, `wing_b` (required)
- `mempalace_graph_stats` — Graph connectivity overview
### Write
- `mempalace_add_drawer` — Store verbatim content into a wing/room
- `wing`, `room`, `content` (required)
- `source_file`: optional source reference
- Checks for duplicates automatically
- `mempalace_delete_drawer` — Remove a drawer by ID
- `drawer_id` (required)
- `mempalace_diary_write` — Write a session diary entry
- `agent_name` (required): your name/identifier
- `entry` (required): what happened, what you learned, what matters
- `topic`: category tag (default "general")
- `mempalace_diary_read` — Read recent diary entries
- `agent_name` (required)
- `last_n`: number of entries (default 10)
## Setup
Install MemPalace and populate the palace (uv recommended):
```bash
uv tool install mempalace # or: pip install mempalace
mempalace init ~/my-convos
mempalace mine ~/my-convos
```
### OpenClaw MCP config
Add to your OpenClaw MCP configuration:
```json
{
"mcpServers": {
"mempalace": {
"command": "python3",
"args": ["-m", "mempalace.mcp_server"]
}
}
}
```
Or via CLI:
```bash
openclaw mcp set mempalace '{"command":"python3","args":["-m","mempalace.mcp_server"]}'
```
### Other MCP hosts
```bash
# Claude Code
claude mcp add mempalace -- python -m mempalace.mcp_server
# Cursor — add to .cursor/mcp.json
# Codex — add to .codex/mcp.json
```
## Tips
- Search is semantic (meaning-based), not keyword. "What did we discuss about database performance?" works better than "database".
- The knowledge graph stores typed relationships with time windows. Use it for facts about people and projects — it knows WHEN things were true.
- Diary entries accumulate across sessions. Write one at the end of each conversation to build continuity.
- Use `mempalace_check_duplicate` before storing new content to avoid duplicates.
- The AAAK dialect (from `mempalace_status`) is a compressed notation for efficient storage. Read it naturally — expand codes mentally, treat *markers* as emotional context.
## License
[MemPalace](https://github.com/MemPalace/mempalace) is MIT licensed. Created by Milla Jovovich, Ben Sigman, Igor Lins e Silva, and contributors.
-2116
View File
File diff suppressed because it is too large Load Diff
Binary file not shown.

Before

Width:  |  Height:  |  Size: 680 KiB

+216
View File
@@ -0,0 +1,216 @@
"""
HTTP ingest server — remote-aware hook target.
Runs as a daemon thread inside the same process as ``mempalace-mcp`` so
ChromaDB has exactly one writer per palace (HNSW indexes are not safe
for multi-process writes). Started from ``mcp_server.main()`` when the
``MEMPALACE_INGEST_PORT`` env var is set.
Endpoints
---------
POST /ingest/transcript
Body: raw bytes of a Claude Code / Codex JSONL transcript.
Headers:
X-Session-Id required, becomes inbox subdirectory name
X-Wing optional, palace wing (defaults to session_id)
Content-Length required, max 50 MB
Authorization optional ``Bearer <token>``; checked when
``MEMPALACE_INGEST_TOKEN`` is set
Effect: writes body to
``<palace>/inbox/<session>/<session>.jsonl`` and runs
``mine_convos`` against that directory.
Response: 202 with ``{"status": "ingested", "session_id": ..., ...}``.
GET /healthz
Response: 200 with a small JSON status payload. Cheap — does not open
the palace. Used by Docker HEALTHCHECK and by clients to verify
auth/connectivity before posting a multi-MB transcript.
Auth is always defense-in-depth here — the primary gate is the
reverse proxy (Caddy) in front. Setting ``MEMPALACE_INGEST_TOKEN`` adds
a second check inside the container in case the proxy is bypassed.
"""
from __future__ import annotations
import hmac
import json
import logging
import os
import re
import sys
import threading
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from .config import MempalaceConfig, sanitize_name
from .version import __version__
logger = logging.getLogger("mempalace_ingest")
MAX_TRANSCRIPT_BYTES = 50 * 1024 * 1024 # 50 MB hard cap per upload
_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_.\-]{1,128}$")
def _check_auth(header_value: str | None) -> bool:
"""Validate the bearer token if MEMPALACE_INGEST_TOKEN is set.
Returns True when no token is configured (auth deferred to the proxy)
or when the supplied token matches. Uses ``hmac.compare_digest`` to
avoid timing-based token recovery.
"""
expected = os.environ.get("MEMPALACE_INGEST_TOKEN", "").strip()
if not expected:
return True
if not header_value or not header_value.lower().startswith("bearer "):
return False
supplied = header_value[7:].strip()
return hmac.compare_digest(supplied.encode("utf-8"), expected.encode("utf-8"))
def _validate_session_id(value: str | None) -> str:
if not value or not _SESSION_ID_RE.match(value):
raise ValueError("invalid or missing X-Session-Id header")
return value
class _IngestHandler(BaseHTTPRequestHandler):
server_version = f"mempalace-ingest/{__version__}"
# Silence the default per-request stderr line.
def log_message(self, fmt, *args): # noqa: A003 (overrides stdlib)
logger.info("%s - %s", self.address_string(), fmt % args)
def _send_json(self, status: int, payload: dict) -> None:
body = json.dumps(payload).encode("utf-8")
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def do_GET(self): # noqa: N802 (stdlib API)
if self.path == "/healthz":
self._send_json(200, {"status": "ok", "version": __version__})
return
self._send_json(404, {"error": "not found"})
def do_POST(self): # noqa: N802 (stdlib API)
if self.path != "/ingest/transcript":
self._send_json(404, {"error": "not found"})
return
if not _check_auth(self.headers.get("Authorization")):
self._send_json(401, {"error": "unauthorized"})
return
try:
session_id = _validate_session_id(self.headers.get("X-Session-Id"))
except ValueError as exc:
self._send_json(400, {"error": str(exc)})
return
wing_header = self.headers.get("X-Wing", "").strip()
try:
wing = sanitize_name(wing_header, "wing") if wing_header else None
except ValueError as exc:
self._send_json(400, {"error": f"invalid wing: {exc}"})
return
try:
length = int(self.headers.get("Content-Length", "0"))
except ValueError:
self._send_json(400, {"error": "invalid Content-Length"})
return
if length <= 0:
self._send_json(400, {"error": "empty body"})
return
if length > MAX_TRANSCRIPT_BYTES:
self._send_json(413, {"error": f"body exceeds {MAX_TRANSCRIPT_BYTES} bytes"})
return
body = self.rfile.read(length)
if len(body) != length:
self._send_json(400, {"error": "truncated body"})
return
# Drop the transcript into the palace inbox and let the existing
# convo miner handle parsing, dedup, room assignment. The miner is
# idempotent — if the hook retries we won't double-file.
palace_path = Path(MempalaceConfig().palace_path)
inbox_dir = palace_path / "inbox" / session_id
try:
inbox_dir.mkdir(parents=True, exist_ok=True)
except OSError as exc:
logger.exception("inbox mkdir failed")
self._send_json(500, {"error": f"inbox unavailable: {exc}"})
return
transcript_path = inbox_dir / f"{session_id}.jsonl"
try:
transcript_path.write_bytes(body)
except OSError as exc:
logger.exception("transcript write failed")
self._send_json(500, {"error": f"transcript write failed: {exc}"})
return
try:
from .convo_miner import mine_convos
mine_convos(
convo_dir=str(inbox_dir),
palace_path=str(palace_path),
wing=wing,
)
except Exception as exc: # noqa: BLE001 (surface any miner error to client)
logger.exception("mine_convos failed")
self._send_json(500, {"error": f"mining failed: {exc.__class__.__name__}"})
return
self._send_json(
202,
{
"status": "ingested",
"session_id": session_id,
"wing": wing,
"bytes": len(body),
},
)
def start_ingest_server(host: str, port: int) -> ThreadingHTTPServer:
"""Bind the ingest server and serve in a daemon thread.
Returns the server instance so callers can ``shutdown()`` if they need
to. The thread is daemonic — it dies with the parent process, which is
what we want when the MCP server's stdio loop exits.
"""
server = ThreadingHTTPServer((host, port), _IngestHandler)
thread = threading.Thread(
target=server.serve_forever,
name="mempalace-ingest",
daemon=True,
)
thread.start()
logger.info("Ingest server listening on http://%s:%d", host, port)
return server
def main():
"""Standalone entrypoint: ``python -m mempalace.ingest_server``.
Used for local dev / testing; production runs the server in-process
inside ``mempalace-mcp``.
"""
logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr)
host = os.environ.get("MEMPALACE_INGEST_HOST", "127.0.0.1")
port = int(os.environ.get("MEMPALACE_INGEST_PORT", "8766"))
server = start_ingest_server(host, port)
try:
threading.Event().wait()
except KeyboardInterrupt:
server.shutdown()
if __name__ == "__main__":
main()
+27
View File
@@ -18,6 +18,18 @@ Tools (write):
Tools (maintenance): Tools (maintenance):
mempalace_reconnect — force cache invalidation and reconnect after external writes mempalace_reconnect — force cache invalidation and reconnect after external writes
Server-mode (optional)
----------------------
Setting ``MEMPALACE_INGEST_PORT=<port>`` starts an HTTP transcript-ingest
endpoint as a daemon thread inside this same process. Same Python
runtime, same ChromaDB client — there is exactly one writer per palace
(ChromaDB's HNSW index is not safe for multi-process writes). Used by
the Unraid Docker deployment in ``deploy/unraid/`` so remote-aware
hooks can POST transcripts without each client needing a local
mempalace install. See ``mempalace/ingest_server.py`` and
``deploy/unraid/README.md``. The default stdio-only path is unaffected
when the env var is unset.
""" """
import os import os
@@ -2245,6 +2257,21 @@ def main():
# is visible at startup rather than on first use (#1222). Pure # is visible at startup rather than on first use (#1222). Pure
# filesystem read; never opens a chromadb client. # filesystem read; never opens a chromadb client.
_refresh_vector_disabled_flag() _refresh_vector_disabled_flag()
# Optional in-process HTTP ingest server. Same process so ChromaDB has
# exactly one writer. Started only when MEMPALACE_INGEST_PORT is set
# (i.e. the Unraid/Docker deployment) — the default stdio-only path is
# unaffected.
_ingest_port = os.environ.get("MEMPALACE_INGEST_PORT", "").strip()
if _ingest_port:
try:
from .ingest_server import start_ingest_server
start_ingest_server(
host=os.environ.get("MEMPALACE_INGEST_HOST", "0.0.0.0"),
port=int(_ingest_port),
)
except Exception as exc: # noqa: BLE001
logger.error("Failed to start ingest server: %s", exc)
while True: while True:
try: try:
line = sys.stdin.readline() line = sys.stdin.readline()
-1
View File
@@ -1 +0,0 @@
OpenArena owner claim verification for MemPalace/mempalace: 09AE2C2E66CC4B5CBD7D
-37
View File
@@ -1,37 +0,0 @@
# dependencies (bun install)
node_modules
# output
out
dist
.vitepress/dist
.vitepress/cache
.vitepress/.temp
*.tgz
# code coverage
coverage
*.lcov
# logs
logs
*.log
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# caches
.eslintcache
.cache
*.tsbuildinfo
# IntelliJ based IDEs
.idea
# Finder (MacOS) folder config
.DS_Store
-119
View File
@@ -1,119 +0,0 @@
import { defineConfig } from 'vitepress'
import { withMermaid } from 'vitepress-plugin-mermaid'
function normalizeBase(base?: string): string {
if (!base || base === '/') {
return '/'
}
return base.endsWith('/') ? base : `${base}/`
}
const docsBase = normalizeBase(process.env.DOCS_BASE || '/')
const editBranch = process.env.DOCS_EDIT_BRANCH || 'main'
const gaId = process.env.MEMPALACE_DOCS_GA_ID
export default withMermaid(
defineConfig({
title: 'MemPalace',
description: 'Give your AI a memory. Local-first storage and retrieval for AI workflows, with benchmark results and MCP tooling.',
base: docsBase,
head: [
['link', { rel: 'icon', href: `${docsBase}mempalace_logo.png` }],
['link', { rel: 'preconnect', href: 'https://api.fontshare.com' }],
['link', { href: 'https://api.fontshare.com/v2/css?f[]=neue-machina@300,400,500,700,800&f[]=satoshi@300,400,500,700&display=swap', rel: 'stylesheet' }],
['link', { rel: 'preconnect', href: 'https://fonts.googleapis.com' }],
['link', { rel: 'preconnect', href: 'https://fonts.gstatic.com', crossorigin: '' }],
['link', { href: 'https://fonts.googleapis.com/css2?family=Onest:wght@300;400;500;600&family=JetBrains+Mono:wght@400;500&display=swap', rel: 'stylesheet' }],
['meta', { property: 'og:title', content: 'MemPalace — AI Memory System' }],
['meta', { property: 'og:description', content: '96.6% LongMemEval recall. Zero API calls. Local, free, open source.' }],
['meta', { property: 'og:image', content: `${docsBase}mempalace_logo.png` }],
...(gaId ? [
['script', { async: '', src: `https://www.googletagmanager.com/gtag/js?id=${gaId}` }],
['script', {}, `window.dataLayer = window.dataLayer || [];\nfunction gtag(){dataLayer.push(arguments);}\ngtag('js', new Date());\ngtag('config', '${gaId}');`],
] as const : []),
],
themeConfig: {
logo: '/mempalace_logo.png',
siteTitle: 'MemPalace',
nav: [
{ text: 'Guide', link: '/guide/getting-started' },
{ text: 'Concepts', link: '/concepts/the-palace' },
{ text: 'Reference', link: '/reference/cli' },
],
sidebar: {
'/guide/': [
{
text: 'Guide',
items: [
{ text: 'Getting Started', link: '/guide/getting-started' },
{ text: 'Mining Your Data', link: '/guide/mining' },
{ text: 'Searching Memories', link: '/guide/searching' },
{ text: 'MCP Integration', link: '/guide/mcp-integration' },
{ text: 'Claude Code Plugin', link: '/guide/claude-code' },
{ text: 'Gemini CLI', link: '/guide/gemini-cli' },
{ text: 'OpenClaw Skill', link: '/guide/openclaw' },
{ text: 'Local Models', link: '/guide/local-models' },
{ text: 'Auto-Save Hooks', link: '/guide/hooks' },
{ text: 'Configuration', link: '/guide/configuration' },
],
},
],
'/concepts/': [
{
text: 'Concepts',
items: [
{ text: 'The Palace', link: '/concepts/the-palace' },
{ text: 'Memory Stack', link: '/concepts/memory-stack' },
{ text: 'AAAK Dialect', link: '/concepts/aaak-dialect' },
{ text: 'Knowledge Graph', link: '/concepts/knowledge-graph' },
{ text: 'Specialist Agents', link: '/concepts/agents' },
{ text: 'Contradiction Detection', link: '/concepts/contradiction-detection' },
],
},
],
'/reference/': [
{
text: 'Reference',
items: [
{ text: 'CLI Commands', link: '/reference/cli' },
{ text: 'MCP Tools', link: '/reference/mcp-tools' },
{ text: 'Python API', link: '/reference/python-api' },
{ text: 'API Reference', link: '/reference/api-reference' },
{ text: 'Module Map', link: '/reference/modules' },
{ text: 'Benchmarks', link: '/reference/benchmarks' },
{ text: 'Contributing', link: '/reference/contributing' },
],
},
],
},
socialLinks: [
{ icon: 'github', link: 'https://github.com/MemPalace/mempalace' },
{ icon: 'discord', link: 'https://discord.com/invite/ycTQQCu6kn' },
],
search: {
provider: 'local',
},
footer: {
message: 'Released under the MIT License.',
copyright: 'Copyright © 2026 MemPalace contributors',
},
editLink: {
pattern: `https://github.com/MemPalace/mempalace/edit/${editBranch}/website/:path`,
text: 'Edit this page on GitHub',
},
},
mermaid: {
theme: 'dark',
},
})
)
-29
View File
@@ -1,29 +0,0 @@
<script setup>
import { useLandingEffects } from './landing/useLandingEffects.js'
import FolioHeader from './landing/FolioHeader.vue'
import HeroSection from './landing/HeroSection.vue'
import ForgettingSection from './landing/ForgettingSection.vue'
import AnatomySection from './landing/AnatomySection.vue'
import DialectSection from './landing/DialectSection.vue'
import MechanicsSection from './landing/MechanicsSection.vue'
import InstallSection from './landing/InstallSection.vue'
import CatalogFooter from './landing/CatalogFooter.vue'
import './landing/landing.css'
useLandingEffects()
</script>
<template>
<div class="mempalace-landing">
<div class="page">
<FolioHeader />
<HeroSection />
<ForgettingSection />
<AnatomySection />
<DialectSection />
<MechanicsSection />
<InstallSection />
<CatalogFooter />
</div>
</div>
</template>
-10
View File
@@ -1,10 +0,0 @@
import DefaultTheme from 'vitepress/theme'
import Landing from './Landing.vue'
import './style.css'
export default {
extends: DefaultTheme,
enhanceApp({ app }) {
app.component('Landing', Landing)
},
}
@@ -1,100 +0,0 @@
<template>
<section v-pre id="anatomy" class="anatomy">
<div class="section-mark"><span class="roman">ii</span> <span>anatomy of a palace</span></div>
<div class="anatomy-head">
<div>
<span class="eyebrow">the method of loci, updated</span>
<h2 class="display">
Wings. Rooms. Closets. <em>Drawers.</em>
</h2>
</div>
<p class="lede">
An ancient memory technique, reworked for a machine. Broad categories
nest time-based groupings; time-based groupings bundle topics; topics
hold verbatim drawers. A symbolic index lets the model scan thousands
of drawers in a single pass and open only the ones it needs.
</p>
</div>
<div class="anatomy-diagram">
<article class="stratum">
<span class="n">W wing</span>
<h3>The <em>Wings</em></h3>
<p class="sub">people · projects · topics</p>
<p>A broad region of the palace, keyed to a real entity a person by name, a project by codename, a domain of your life. Entity-first, always.</p>
<div class="diagram">
<svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
<rect x="5" y="20" width="190" height="50" opacity="0.4"/>
<rect x="15" y="28" width="50" height="34" />
<rect x="75" y="28" width="50" height="34" />
<rect x="135" y="28" width="50" height="34" />
<line x1="5" y1="12" x2="195" y2="12" stroke-dasharray="2 3" opacity="0.5"/>
</svg>
</div>
</article>
<article class="stratum">
<span class="n">R room</span>
<h3>The <em>Rooms</em></h3>
<p class="sub">days · sessions · threads</p>
<p>Inside a wing sit rooms discrete units of time. One room per day, or one per session. Walk the corridor and the palace unfolds chronologically, room by room.</p>
<div class="diagram">
<svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
<rect x="10" y="20" width="36" height="44" />
<rect x="56" y="20" width="36" height="44" />
<rect x="102" y="20" width="36" height="44" />
<rect x="148" y="20" width="36" height="44" />
<line x1="10" y1="70" x2="184" y2="70" stroke-dasharray="1 3" opacity="0.6"/>
</svg>
</div>
</article>
<article class="stratum">
<span class="n">C closet</span>
<h3>The <em>Closets</em></h3>
<p class="sub">topics · threads · bundles</p>
<p>Inside a room, closets group related drawers by topic or thread. Open one closet and you see every drawer on that subject together no need to walk the whole room.</p>
<div class="diagram">
<svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
<g class="closet">
<rect x="10" y="14" width="54" height="52" />
<line x1="37" y1="14" x2="37" y2="66" opacity="0.5"/>
<circle cx="33" cy="40" r="1.2" fill="currentColor"/>
<circle cx="41" cy="40" r="1.2" fill="currentColor"/>
</g>
<g class="closet">
<rect x="73" y="14" width="54" height="52" />
<line x1="100" y1="14" x2="100" y2="66" opacity="0.5"/>
<circle cx="96" cy="40" r="1.2" fill="currentColor"/>
<circle cx="104" cy="40" r="1.2" fill="currentColor"/>
</g>
<g class="closet">
<rect x="136" y="14" width="54" height="52" />
<line x1="163" y1="14" x2="163" y2="66" opacity="0.5"/>
<circle cx="159" cy="40" r="1.2" fill="currentColor"/>
<circle cx="167" cy="40" r="1.2" fill="currentColor"/>
</g>
</svg>
</div>
</article>
<article class="stratum">
<span class="n">D drawer</span>
<h3>The <em>Drawers</em></h3>
<p class="sub">verbatim · permanent · exact</p>
<p>Each room holds drawers. A drawer is a single chunk of verbatim content the exact words, untouched. The palace's promise is kept here.</p>
<div class="diagram">
<svg viewBox="0 0 200 80" fill="none" stroke="currentColor" stroke-width="1" style="color:var(--prism);">
<rect x="40" y="14" width="120" height="16" />
<rect x="40" y="34" width="120" height="16" />
<rect x="40" y="54" width="120" height="16" />
<circle cx="150" cy="22" r="1.5" fill="currentColor"/>
<circle cx="150" cy="42" r="1.5" fill="currentColor"/>
<circle cx="150" cy="62" r="1.5" fill="currentColor"/>
</svg>
</div>
</article>
</div>
</section>
</template>
@@ -1,45 +0,0 @@
<template>
<footer v-pre class="catalog">
<form class="waitlist waitlist-footer" data-source="footer" novalidate>
<div class="waitlist-head">
<span class="waitlist-pulse" aria-hidden="true"></span>
<span class="waitlist-eyebrow">Last call &middot; subscribe for updates</span>
</div>
<div class="waitlist-row">
<input type="email" class="waitlist-input" name="email" placeholder="you@example.com" autocomplete="email" aria-label="Email address" required />
<button type="submit" class="waitlist-submit">
<span class="waitlist-label-default">Join the list</span>
<span class="waitlist-label-pending" aria-hidden="true">Joining</span>
<svg class="waitlist-arrow" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.6" aria-hidden="true"><path d="M5 12h14M13 6l6 6-6 6"/></svg>
<svg class="waitlist-check" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" aria-hidden="true"><path d="M5 12l5 5 9-11"/></svg>
</button>
</div>
<p class="waitlist-msg" aria-live="polite"></p>
</form>
<div class="catalog-card">
<div>
<p class="catalog-title">MemPalace <em>&mdash;</em> a memory palace for AI.</p>
<p class="catalog-desc">Verbatim storage, local-first, zero telemetry. Built for people who believe their words are theirs.</p>
</div>
<div>
<h4>Documentation</h4>
<ul>
<li><a href="/guide/getting-started">Getting started</a></li>
<li><a href="/concepts/the-palace">The palace</a></li>
<li><a href="/reference/cli">CLI reference</a></li>
<li><a href="/reference/benchmarks">Benchmarks</a></li>
</ul>
</div>
<div>
<h4>The project</h4>
<ul>
<li><a href="https://github.com/MemPalace/mempalace">GitHub</a></li>
<li><a href="https://github.com/MemPalace/mempalace/blob/main/README.md">Readme</a></li>
<li><a href="https://github.com/MemPalace/mempalace/blob/main/ROADMAP.md">Roadmap</a></li>
<li><a href="https://github.com/MemPalace/mempalace/blob/main/CHANGELOG.md">Changelog</a></li>
</ul>
</div>
</div>
</footer>
</template>
@@ -1,64 +0,0 @@
<template>
<section v-pre id="dialect" class="dialect">
<div class="section-mark"><span class="roman">iii</span> <span>the aaak dialect</span></div>
<div class="dialect-head">
<span class="eyebrow">index &larr; verbatim</span>
<h2 class="display">
A compressed symbolic language <em>for finding</em>, not remembering.
</h2>
<p class="lede">
The content stays verbatim always. The <em>index</em> above it is written
in AAAK: a dense symbolic dialect an LLM can scan at a glance. Thousands
of entries, one pass, exact drawer located.
</p>
</div>
<div class="dialect-grid">
<article class="slab">
<header class="card-head">
<span class="l">drawer · D-007</span>
<span>verbatim · exact · permanent</span>
</header>
<p class="label">The drawer, as stored.</p>
<p>
"My son's name is <strong>Noah</strong>. He turns <strong>six</strong>
on <strong>September 12th</strong>. He loves dinosaurs —
especially the <strong>therizinosaurus</strong> because of the
claws. We want to do a small party at <strong>the park on Glebe
Point Road</strong>, maybe eight kids."
</p>
<p style="color:var(--ice-ghost); font-size: 13.5px; font-family: var(--f-mono); letter-spacing: 0.05em; margin-top:1.5rem;">
&mdash; kept as spoken. never rewritten.
</p>
</article>
<div class="dialect-arrow" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.3">
<path d="M12 3v18M7 8l5-5 5 5M7 16l5 5 5-5"/>
</svg>
<span>index · AAAK</span>
</div>
<article class="slab mono">
<header class="card-head">
<span class="l">index · AAAK</span>
<span>indexes · compressed · addressable</span>
</header>
<p class="label">The pointer, as indexed.</p>
<pre><span class="c">§ W-042/R-11/D-007</span>
<span class="k">@p</span> <span class="t">noah</span>~<span class="v">son.age=6</span>~<span class="v">dob=09-12</span>
<span class="k">@l</span> <span class="t">glebe-pt-rd.park</span>
<span class="k">@e</span> <span class="t">birthday</span>~<span class="v">party(n8)</span>
<span class="k">@i</span> <span class="t">therizinosaurus</span>~<span class="v">claws</span>
<span class="k">@t</span> <span class="v">2026-04-14T09:41</span>
<span class="c">§ ptr D-007 (verbatim)</span></pre>
</article>
</div>
<p class="dialect-caption">
Dense compression on the pointer layer. Full fidelity on the content
layer. You get speed without ever losing a word.
</p>
</section>
</template>
@@ -1,16 +0,0 @@
<template>
<header v-pre class="folio" role="banner">
<div class="mark" aria-label="MemPalace">
<img src="/mempalace_logo.png" alt="" aria-hidden="true" />
<span>MemPalace</span>
</div>
<nav class="right" aria-label="Primary">
<a href="#anatomy" class="hide-mobile">Anatomy</a>
<a href="#dialect" class="hide-mobile">Dialect</a>
<a href="#mechanics" class="hide-mobile">Mechanics</a>
<a href="#install" class="hide-mobile">Install</a>
<a href="/guide/getting-started">Docs</a>
<a href="https://github.com/MemPalace/mempalace">GitHub </a>
</nav>
</header>
</template>
@@ -1,43 +0,0 @@
<template>
<section v-pre id="forgetting" class="forgetting">
<div class="section-mark"><span class="roman">i</span> <span>the forgetting</span></div>
<header class="forgetting-head">
<div class="copy">
<span class="eyebrow">before &middot; after</span>
<h2 class="display">
The same conversation, <em>twice.</em>
</h2>
<p class="lede" style="margin:0;">
Scroll down and watch. On the left, a model without memory. On the right,
the same model with MemPalace. The words are identical until two weeks
pass.
</p>
</div>
<button class="replay" id="replay-demo" type="button" aria-label="Replay the demo">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" aria-hidden="true"><path d="M4 4v6h6"/><path d="M20 20v-6h-6"/><path d="M4 10a8 8 0 0114-5l2 3"/><path d="M20 14a8 8 0 01-14 5l-2-3"/></svg>
replay
</button>
</header>
<div class="forgetting-compare" id="forgetting-compare" aria-label="Comparison demo">
<article class="demo-pane demo-forget">
<header>
<span class="pane-tag">without mempalace</span>
<span class="pane-meta">session <em>resets</em> &middot; no recall</span>
</header>
<div class="chat" data-pane="forget" aria-live="polite"></div>
</article>
<div class="divider" aria-hidden="true"></div>
<article class="demo-pane demo-remember">
<header>
<span class="pane-tag">with mempalace</span>
<span class="pane-meta">verbatim &middot; retrieved <em>instantly</em></span>
</header>
<div class="chat" data-pane="remember" aria-live="polite"></div>
</article>
</div>
</section>
</template>
@@ -1,80 +0,0 @@
<template>
<section v-pre class="hero" id="hero">
<span class="corner-ticks" aria-hidden="true"><span></span></span>
<div class="hero-inner">
<div class="hero-copy">
<h1 class="display">
<span class="line">Memory <em class="is-accent">is</em></span>
<span class="line line-2"><span class="identity-white">identity.</span></span>
</h1>
<p class="lede">
Every conversation, every idea, every small decision&hellip; held somewhere safe.
<br><br>Welcome to the future of memory: <span class="mp-blue">MemPalace</span>
</p>
<form class="waitlist waitlist-hero" data-source="hero" novalidate>
<div class="waitlist-head">
<span class="waitlist-pulse" aria-hidden="true"></span>
<span class="waitlist-eyebrow">Subscribe for updates</span>
</div>
<div class="waitlist-row">
<input
type="email"
class="waitlist-input"
name="email"
placeholder="you@example.com"
autocomplete="email"
aria-label="Email address"
required
/>
<button type="submit" class="waitlist-submit">
<span class="waitlist-label-default">Join the list</span>
<span class="waitlist-label-pending" aria-hidden="true">Joining</span>
<svg class="waitlist-arrow" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.6" aria-hidden="true">
<path d="M5 12h14M13 6l6 6-6 6"/>
</svg>
<svg class="waitlist-check" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" aria-hidden="true">
<path d="M5 12l5 5 9-11"/>
</svg>
</button>
</div>
<p class="waitlist-msg" aria-live="polite"></p>
</form>
<div class="hero-secondary">
<a href="/guide/getting-started">Read the docs</a>
<span class="sep" aria-hidden="true">·</span>
<a href="https://github.com/MemPalace/mempalace">GitHub </a>
</div>
</div>
<!-- Palace video visual -->
<div class="palace-stage" aria-hidden="true">
<div class="halo"></div>
<div class="stars">
<i style="top:12%; left:22%; --t:5s; --d:0.0s"></i>
<i style="top:18%; left:74%; --t:6s; --d:1.2s"></i>
<i style="top:34%; left:8%; --t:4s; --d:0.6s"></i>
<i style="top:44%; left:88%; --t:7s; --d:0.3s"></i>
<i style="top:62%; left:14%; --t:5.5s; --d:1.8s"></i>
<i style="top:72%; left:82%; --t:4.5s; --d:0.9s"></i>
<i style="top:82%; left:38%; --t:6.2s; --d:2.4s"></i>
<i style="top:28%; left:52%; --t:5.2s; --d:3.0s"></i>
<i style="top:88%; left:60%; --t:4.8s; --d:1.5s"></i>
<i style="top:6%; left:48%; --t:6.8s; --d:0.4s"></i>
</div>
<video
class="palace-video"
src="/hero_video.mp4"
autoplay
muted
loop
playsinline
disablepictureinpicture
></video>
</div>
</div>
</section>
</template>
@@ -1,38 +0,0 @@
<template>
<section v-pre id="install" class="install">
<div class="section-mark" style="left:50%; transform:translateX(-50%);"><span class="roman">v</span> <span>begin</span></div>
<span class="eyebrow" style="justify-content:center;">open a drawer</span>
<h2 class="display">
Build your <em>palace.</em>
</h2>
<p class="lede" style="margin-left:auto;margin-right:auto;text-align:center;">
One command to install. One to initialize. Your words yours, permanent,
instantly recallable from that moment on.
</p>
<div class="terminal" role="figure" aria-label="Installation commands">
<div class="terminal-head">
<span class="lights"><i></i><i></i><i></i></span>
<span>~/mempalace &middot; bash</span>
</div>
<pre><span class="prompt">$</span> pip install -e <span class="dim">".[dev]"</span>
<span class="c">Successfully installed mempalace</span>
<span class="prompt">$</span> mempalace init
<span class="ok"> </span> palace created at <span class="dim">~/.mempalace</span>
<span class="ok"> </span> hooks registered <span class="dim">(stop, precompact)</span>
<span class="ok"> </span> knowledge graph initialized
<span class="prompt">$</span> mempalace mine <span class="dim">./notes</span>
<span class="ok"> </span> filed · <span class="c">W-001/R-01/D-001</span></pre>
</div>
<div class="install-cta">
<a href="/guide/getting-started" class="btn btn-primary">
Read the docs
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M5 12h14M13 6l6 6-6 6"/></svg>
</a>
<a href="https://github.com/MemPalace/mempalace" class="btn">
Visit the repository
</a>
</div>
</section>
</template>
@@ -1,83 +0,0 @@
<template>
<section v-pre id="mechanics">
<div class="section-mark"><span class="roman">iv</span> <span>how it works</span></div>
<div class="mechanics-head">
<span class="eyebrow">mechanism · architecture</span>
<h2 class="display">
Four pieces. <em>No cloud.</em> No keys.
</h2>
</div>
<div class="mechanics">
<article class="mech">
<div class="icon" aria-hidden="true">
<svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
<rect x="8" y="10" width="32" height="22" rx="1"/>
<path d="M8 16h32"/>
<g class="mech-bars">
<path d="M14 24h20"/>
<path d="M14 28h12"/>
</g>
<path d="M16 38h16M20 32v6M28 32v6"/>
<circle class="mech-led" cx="36" cy="13.5" r="1.1" fill="currentColor"/>
</svg>
</div>
<span class="eyebrow no-rule"><span class="n"> 01</span></span>
<h3>Local-<em>first</em></h3>
<p>ChromaDB on disk. SQLite for the knowledge graph. Nothing is uploaded. Nothing is synced. Your palace lives under a single directory on your machine.</p>
<div class="metric">path · <b>~/.mempalace</b></div>
</article>
<article class="mech">
<div class="icon" aria-hidden="true">
<svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
<circle class="mech-ring" cx="24" cy="24" r="14"/>
<path class="mech-plus" d="M16 24h16M24 16v16"/>
<path class="mech-slash" d="M10 10l28 28" stroke-width="1.5"/>
</svg>
</div>
<span class="eyebrow no-rule"><span class="n"> 02</span></span>
<h3>Zero <em>API</em></h3>
<p>Extraction, chunking, and embedding all run locally. No OpenAI key, no Anthropic key, no sentence-transformers endpoint. The memory works even offline, on a plane.</p>
<div class="metric">keys required · <b>none</b></div>
</article>
<article class="mech">
<div class="icon" aria-hidden="true">
<svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
<path d="M8 36V18l8-8h16l8 8v18"/>
<path d="M8 36h32"/>
<g class="mech-gear">
<circle cx="24" cy="26" r="4"/>
<path d="M24 22v-4M24 30v4M20 26h-4M28 26h4"/>
</g>
</svg>
</div>
<span class="eyebrow no-rule"><span class="n"> 03</span></span>
<h3>Background <em>hooks</em></h3>
<p>Filing and indexing happen silently through Claude Code hooks. On session end, on pre-compaction. You write. The palace fills itself behind the curtain.</p>
<div class="metric">hook budget · <b>&lt;500 ms</b></div>
</article>
<article class="mech">
<div class="icon" aria-hidden="true">
<svg viewBox="0 0 48 48" fill="none" stroke="currentColor" stroke-width="1.3">
<path class="mech-edges" d="M12 14l10 10M36 12L26 24M14 36l8-8M36 34l-10-6" opacity="0.6"/>
<g class="mech-nodes">
<circle cx="10" cy="12" r="3"/>
<circle cx="38" cy="10" r="3"/>
<circle cx="24" cy="26" r="3"/>
<circle cx="12" cy="38" r="3"/>
<circle cx="38" cy="36" r="3"/>
</g>
</svg>
</div>
<span class="eyebrow no-rule"><span class="n"> 04</span></span>
<h3>Temporal <em>graph</em></h3>
<p>Relationships across entities with valid-from and valid-to dates. Who worked on what. When did this change. Facts that were true then, and may not be now.</p>
<div class="metric">store · <b>sqlite</b></div>
</article>
</div>
</section>
</template>
File diff suppressed because it is too large Load Diff
@@ -1,406 +0,0 @@
import { onMounted, onBeforeUnmount } from 'vue'
export function useLandingEffects() {
// Shared cleanup registry — IIFEs push disconnect/removeEventListener thunks
// here so onBeforeUnmount can tear everything down on SPA nav.
const cleanups = []
onMounted(() => {
if (typeof document === 'undefined') return
// Hide VitePress chrome while the landing component is live, restore on leave.
document.body.classList.add('mempalace-active')
/* ---------- Waitlist submission ---------- */
;(function initWaitlist(){
const ENDPOINT = 'https://br.staging.mempalaceofficial.com/waitlist'
const forms = document.querySelectorAll('.mempalace-landing .waitlist')
const emailRe = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
forms.forEach(form => {
const input = form.querySelector('.waitlist-input')
const button = form.querySelector('.waitlist-submit')
const msg = form.querySelector('.waitlist-msg')
const source = form.dataset.source || 'landing'
function setState(state, text) {
form.classList.remove('is-pending', 'is-success', 'is-error')
if (state) form.classList.add('is-' + state)
if (text != null) msg.textContent = text
}
const onSubmit = async (e) => {
e.preventDefault()
if (form.classList.contains('is-success') || form.classList.contains('is-pending')) return
const email = (input.value || '').trim()
if (!emailRe.test(email)) {
setState('error', 'Please provide a valid email address.')
input.focus()
return
}
setState('pending', 'Sending…')
button.disabled = true
input.disabled = true
try {
const res = await fetch(ENDPOINT, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ email, source })
})
let data = null
try { data = await res.json() } catch (_) { /* no body */ }
if (res.ok) {
setState('success', (data && data.message) || "Success! You're on the list for updates.")
// keep inputs disabled so they can't resubmit accidentally
input.value = email
return
}
if (res.status === 429) {
setState('error', 'Whoa — slow down a moment, then try again.')
} else if (res.status === 400) {
setState('error', (data && data.message) || 'Please provide a valid email address.')
} else {
setState('error', (data && data.message) || 'Something went wrong. Please try again later.')
}
button.disabled = false
input.disabled = false
} catch (_err) {
setState('error', 'Network error — please try again.')
button.disabled = false
input.disabled = false
}
}
const onInput = () => {
if (form.classList.contains('is-error')) setState(null, '')
}
form.addEventListener('submit', onSubmit)
input.addEventListener('input', onInput)
cleanups.push(() => {
form.removeEventListener('submit', onSubmit)
input.removeEventListener('input', onInput)
})
})
})()
/* ---------- Reveal-on-scroll for cards ---------- */
;(function(){
if (!('IntersectionObserver' in window)) return
const items = document.querySelectorAll('.mempalace-landing .stratum, .mempalace-landing .mech, .mempalace-landing .slab')
items.forEach(el => {
el.style.opacity = '0'
el.style.transform = 'translateY(20px)'
el.style.transition = 'opacity 0.9s ease, transform 0.9s ease'
})
const io = new IntersectionObserver((entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting){
const idx = [...entry.target.parentElement.children].indexOf(entry.target)
entry.target.style.transitionDelay = (idx * 80) + 'ms'
entry.target.style.opacity = '1'
entry.target.style.transform = 'translateY(0)'
io.unobserve(entry.target)
}
})
}, { rootMargin: '0px 0px -80px 0px' })
items.forEach(el => io.observe(el))
cleanups.push(() => io.disconnect())
})()
/* ---------- Forgetting demo ---------- */
;(function initForgettingDemo(){
const compare = document.getElementById('forgetting-compare')
if (!compare) return
const leftChat = compare.querySelector('[data-pane="forget"]')
const rightChat = compare.querySelector('[data-pane="remember"]')
const replayBtn = document.getElementById('replay-demo')
const reduced = window.matchMedia('(prefers-reduced-motion: reduce)').matches
const delay = ms => new Promise(r => setTimeout(r, reduced ? Math.min(ms, 60) : ms))
function clear() {
leftChat.innerHTML = ''
rightChat.innerHTML = ''
if (replayBtn) replayBtn.classList.remove('visible')
}
function addMsg(chat, who, opts = {}) {
const row = document.createElement('div')
row.className = 'msg ' + (who === 'You' ? 'you' : 'ai')
if (opts.id) row.dataset.id = opts.id
row.innerHTML = '<span class="who">' + who + '</span><span class="body"></span>'
chat.appendChild(row)
chat.scrollTop = chat.scrollHeight
return row
}
async function typeInto(row, text, speed = 14) {
const body = row.querySelector('.body')
const parts = text.split(/(<[^>]+>)/)
row.classList.add('typing')
for (const part of parts) {
if (!part) continue
if (part.startsWith('<')) { body.insertAdjacentHTML('beforeend', part); continue }
for (const ch of part) {
body.insertAdjacentText('beforeend', ch)
if (!reduced) await delay(speed + (Math.random() < 0.08 ? 40 : 0))
}
}
row.classList.remove('typing')
}
function addDivider(chat, text) {
const d = document.createElement('div')
d.className = 'divider-time'
d.textContent = '— ' + text + ' —'
chat.appendChild(d)
return d
}
function addRetrieval(chat, callNumber, ms) {
const row = document.createElement('div')
row.className = 'retrieval'
row.innerHTML =
'<span class="who">mem</span>' +
'<span class="l">retrieved &middot; <span class="r">' + callNumber + '</span></span>' +
'<span>' + ms + '&nbsp;ms</span>'
chat.appendChild(row)
return row
}
function addStamp(chat, text, callNumber) {
const el = document.createElement('div')
el.className = 'stamp'
el.innerHTML = '<span>— ' + text + '</span>' +
(callNumber ? '<span class="call">' + callNumber + '</span>' : '')
chat.appendChild(el)
return el
}
function disintegrate(target) {
return new Promise(resolve => {
const parent = target.closest('.chat')
if (!parent) { resolve(); return }
const parentRect = parent.getBoundingClientRect()
const style = getComputedStyle(target)
const font = style.font ||
(style.fontStyle + ' ' + style.fontWeight + ' ' + style.fontSize + '/' + style.lineHeight + ' ' + style.fontFamily)
const color = style.color
let overlay = parent.querySelector('.dust-overlay')
if (!overlay) {
overlay = document.createElement('div')
overlay.className = 'dust-overlay'
parent.appendChild(overlay)
}
const walker = document.createTreeWalker(target, NodeFilter.SHOW_TEXT)
const range = document.createRange()
const spans = []
let node
while ((node = walker.nextNode())) {
const chars = node.textContent
for (let i = 0; i < chars.length; i++) {
if (chars[i] === ' ') continue
range.setStart(node, i)
range.setEnd(node, i + 1)
const r = range.getBoundingClientRect()
if (r.width === 0 || r.height === 0) continue
const span = document.createElement('span')
span.className = 'dust'
span.textContent = chars[i]
span.style.left = (r.left - parentRect.left) + 'px'
span.style.top = (r.top - parentRect.top) + 'px'
span.style.width = r.width + 'px'
span.style.height = r.height + 'px'
span.style.font = font
span.style.color = color
span.style.opacity = '1'
span.style.transform = 'translate(0,0)'
span.style.transitionDuration = (1500 + Math.random() * 900) + 'ms'
overlay.appendChild(span)
spans.push(span)
}
}
target.style.transition = 'color 0.35s ease, opacity 0.35s ease'
target.style.color = 'transparent'
void overlay.offsetHeight
const cx = parentRect.width / 2
spans.forEach((s) => {
s.style.transitionDelay = (Math.random() * 500) + 'ms'
const x0 = parseFloat(s.style.left)
const dx = (x0 - cx) * 0.06 + (Math.random() - 0.5) * 36
const dy = 30 + Math.random() * 80
const rot = (Math.random() - 0.5) * 44
s.style.transform = 'translate(' + dx + 'px,' + dy + 'px) rotate(' + rot + 'deg)'
s.style.opacity = '0'
s.style.filter = 'blur(2px)'
})
setTimeout(() => {
spans.forEach(s => s.remove())
resolve()
}, reduced ? 200 : 2600)
})
}
const NOAH_TEXT = "My son's name is Noah. He turns six on September 12th."
async function runForget() {
const you1 = addMsg(leftChat, 'You', { id: 'noah' })
await delay(200)
await typeInto(you1, NOAH_TEXT, 16)
await delay(500)
const ai1 = addMsg(leftChat, 'Model')
await typeInto(ai1, "Noted. I'll remember that for next time we talk.", 14)
await delay(900)
addDivider(leftChat, 'two weeks later')
await delay(700)
const you2 = addMsg(leftChat, 'You')
await typeInto(you2, "Help me plan Noah's birthday.", 18)
await delay(700)
const target = leftChat.querySelector('.msg[data-id="noah"] .body')
if (target) await disintegrate(target)
await delay(250)
const ai2 = addMsg(leftChat, 'Model')
await typeInto(ai2, "Of course. Who is Noah? How old is he turning?", 16)
await delay(500)
addStamp(leftChat, 'forgotten.')
}
async function runRemember() {
const you1 = addMsg(rightChat, 'You', { id: 'noah' })
await delay(200)
await typeInto(you1, NOAH_TEXT, 16)
await delay(500)
const ai1 = addMsg(rightChat, 'Model')
await typeInto(ai1, "Noted. Filed — <strong>W-042/R-01/D-003</strong>.", 14)
await delay(900)
addDivider(rightChat, 'two weeks later')
await delay(700)
const you2 = addMsg(rightChat, 'You')
await typeInto(you2, "Help me plan Noah's birthday.", 18)
await delay(600)
addRetrieval(rightChat, 'W-042/R-01/D-003', 42)
await delay(700)
const ai2 = addMsg(rightChat, 'Model')
await typeInto(ai2,
"Of course — <strong>Noah</strong> turns <strong>six</strong> on <strong>September 12th</strong>. " +
"You mentioned he loves the <strong>therizinosaurus</strong>, and a park on " +
"<strong>Glebe Point Road</strong>. Shall we build from there?",
11)
await delay(500)
addStamp(rightChat, 'remembered.', 'W-042/R-01/D-003')
}
let running = { forget: false, remember: false }
let started = { forget: false, remember: false }
async function runBoth() {
if (running.forget || running.remember) return
running.forget = running.remember = true
started.forget = started.remember = true
clear()
await delay(200)
await Promise.all([runForget(), runRemember()])
running.forget = running.remember = false
if (replayBtn) replayBtn.classList.add('visible')
}
async function runSide(side) {
if (running[side] || started[side]) return
running[side] = true
started[side] = true
const chat = side === 'forget' ? leftChat : rightChat
chat.innerHTML = ''
await delay(200)
await (side === 'forget' ? runForget() : runRemember())
running[side] = false
if (started.forget && started.remember && !running.forget && !running.remember && replayBtn) {
replayBtn.classList.add('visible')
}
}
function resetAll() {
started.forget = started.remember = false
clear()
}
const stackedMQ = window.matchMedia('(max-width: 900px)')
const isStacked = () => stackedMQ.matches
function observeOnce(el, onReach) {
if (!('IntersectionObserver' in window)) { onReach(); return null }
let done = false
const io = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (done || !entry.isIntersecting) return
const rect = entry.boundingClientRect
const elementCoverage = entry.intersectionRatio
const viewportCoverage = entry.intersectionRect.height / window.innerHeight
const mostlyVisible = elementCoverage >= 0.65
const dominatesView = viewportCoverage >= 0.60 && rect.top <= window.innerHeight * 0.15
if (mostlyVisible || dominatesView) {
done = true
onReach()
io.disconnect()
}
})
}, {
threshold: [0.1, 0.25, 0.4, 0.55, 0.7, 0.85, 1.0],
rootMargin: '-8% 0px -8% 0px'
})
io.observe(el)
return io
}
let observers = []
function disconnectObservers() {
observers.forEach(io => io && io.disconnect())
observers = []
}
function armObservers() {
disconnectObservers()
if (isStacked()) {
observers.push(observeOnce(compare.querySelector('.demo-forget'), () => runSide('forget')))
observers.push(observeOnce(compare.querySelector('.demo-remember'), () => runSide('remember')))
} else {
observers.push(observeOnce(compare, runBoth))
}
}
const onReplayClick = () => {
resetAll()
armObservers()
}
if (replayBtn) replayBtn.addEventListener('click', onReplayClick)
armObservers()
cleanups.push(() => {
disconnectObservers()
if (replayBtn) replayBtn.removeEventListener('click', onReplayClick)
})
})()
})
onBeforeUnmount(() => {
if (typeof document === 'undefined') return
document.body.classList.remove('mempalace-active')
while (cleanups.length) {
const fn = cleanups.pop()
try { fn() } catch (_) { /* swallow — teardown best-effort */ }
}
})
}

Some files were not shown because too many files have changed in this diff Show More