Merge pull request #673 from jphein/feat/deterministic-hook-save

Clean squash by jphein on 2026-04-21. Backwards-compatible via hook_silent_save config flag. Save marker now only advances after confirmed write — strictly safer than status quo.
This commit is contained in:
Ben Sigman
2026-04-21 17:38:21 -07:00
committed by GitHub
4 changed files with 447 additions and 77 deletions
+262 -17
View File
@@ -17,22 +17,54 @@ from pathlib import Path
SAVE_INTERVAL = 15
STATE_DIR = Path.home() / ".mempalace" / "hook_state"
def _mempalace_python() -> str:
"""Return the python interpreter that has mempalace installed.
When hooks are invoked by Claude Code, sys.executable may be the system
python which lacks chromadb and other deps. Resolution order:
1. MEMPALACE_PYTHON env var (explicit override)
2. Venv python from package install path
3. Editable install: venv/ sibling to mempalace/
4. sys.executable fallback
"""
# Honor explicit override (used by shell hook wrappers)
env_python = os.environ.get("MEMPALACE_PYTHON", "")
if env_python and os.path.isfile(env_python) and os.access(env_python, os.X_OK):
return env_python
# This file lives at <venv>/lib/pythonX.Y/site-packages/mempalace/hooks_cli.py
# or <project>/mempalace/hooks_cli.py (editable install).
venv_bin = Path(__file__).resolve().parents[3] / "bin" / "python"
if venv_bin.is_file():
return str(venv_bin)
# Editable install: assumes project root has a venv/ sibling to mempalace/
project_venv = Path(__file__).resolve().parents[1] / "venv" / "bin" / "python"
if project_venv.is_file():
return str(project_venv)
return sys.executable
_RECENT_MSG_COUNT = 30 # how many recent user messages to summarize
STOP_BLOCK_REASON = (
"AUTO-SAVE checkpoint (MemPalace). Save this session's key content:\n"
"1. mempalace_diary_write — AAAK-compressed session summary\n"
"2. mempalace_add_drawer — verbatim quotes, decisions, code snippets\n"
"1. mempalace_diary_write — session summary (what was discussed, "
"key decisions, current state of work)\n"
"2. mempalace_add_drawer — verbatim quotes, decisions, code snippets "
"(place in appropriate wing and room)\n"
"3. mempalace_kg_add — entity relationships (optional)\n"
"Do NOT write to Claude Code's native auto-memory (.md files). "
"Continue conversation after saving."
"For THIS save, use MemPalace MCP tools only (not auto-memory .md files). "
"Use verbatim quotes where possible. Continue conversation after saving."
)
PRECOMPACT_BLOCK_REASON = (
"COMPACTION IMMINENT (MemPalace). Save ALL session content before context is lost:\n"
"1. mempalace_diary_write — thorough AAAK-compressed session summary\n"
"2. mempalace_add_drawer — ALL verbatim quotes, decisions, code, context\n"
"1. mempalace_diary_write — thorough session summary\n"
"2. mempalace_add_drawer — ALL verbatim quotes, decisions, code, context "
"(place each in appropriate wing and room)\n"
"3. mempalace_kg_add — entity relationships (optional)\n"
"Be thorough \u2014 after compaction, detailed context will be lost. "
"Do NOT write to Claude Code's native auto-memory (.md files). "
"For THIS save, use MemPalace MCP tools only (not auto-memory .md files). "
"Be thorough — after compaction this is all that survives. "
"Save everything to MemPalace, then allow compaction to proceed."
)
@@ -264,6 +296,176 @@ def _mine_sync(transcript_path: str = ""):
pass
def _desktop_toast(body: str, title: str = "MemPalace"):
"""Send a desktop notification via notify-send. Fails silently."""
try:
subprocess.Popen(
["notify-send", "--app-name=MemPalace", "--icon=brain", title, body],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except OSError:
pass
def _extract_recent_messages(transcript_path: str, count: int = _RECENT_MSG_COUNT) -> list[str]:
"""Extract the last N user messages from a JSONL transcript."""
path = Path(transcript_path).expanduser()
if not path.is_file():
return []
messages = []
try:
with open(path, encoding="utf-8", errors="replace") as f:
for line in f:
try:
entry = json.loads(line)
# Claude Code format
msg = entry.get("message") or entry.get("event_message") or {}
if isinstance(msg, dict) and msg.get("role") == "user":
content = msg.get("content", "")
if isinstance(content, list):
content = " ".join(
b.get("text", "") for b in content if isinstance(b, dict)
)
if not isinstance(content, str) or not content.strip():
continue
if "<command-message>" in content or "<system-reminder>" in content:
continue
messages.append(content.strip()[:200])
# Codex CLI format
elif entry.get("type") == "event_msg":
payload = entry.get("payload", {})
if isinstance(payload, dict) and payload.get("type") == "user_message":
text = payload.get("message", "")
if isinstance(text, str) and text.strip():
if "<command-message>" not in text:
messages.append(text.strip()[:200])
except (json.JSONDecodeError, AttributeError):
pass
except OSError:
return []
return messages[-count:]
_THEME_STOPWORDS = frozenset(
"the a an and or but in on at to for of is it i me my you your we our "
"this that with from by was were be been are not no yes can do did dont "
"will would should could have has had lets let just also like so if then "
"ok okay sure yeah hey hi here there what when where how why which some "
"all any each every about into out up down over after before between "
"get got make made need want use used using check look see run try "
"know think right now still already really very much more most too "
"file files code one two new first last next thing things way well".split()
)
def _extract_themes(messages: list[str], max_themes: int = 3) -> list[str]:
"""Pull 2-3 distinctive topic words from recent messages.
Note: stopword list is English-only; non-English corpora will produce noisy themes.
"""
from collections import Counter
words: Counter[str] = Counter()
for msg in messages:
for word in msg.lower().split():
# Strip punctuation, keep words 4+ chars
clean = word.strip(".,;:!?\"'`()[]{}#<>/\\-_=+@$%^&*~")
if len(clean) >= 4 and clean not in _THEME_STOPWORDS and clean.isalpha():
words[clean] += 1
return [w for w, _ in words.most_common(max_themes)]
def _save_diary_direct(
transcript_path: str,
session_id: str,
toast: bool = False,
) -> dict:
"""Write a diary checkpoint by calling the tool function directly (no MCP roundtrip).
Returns {"count": N, "themes": [...]} on success, {"count": 0} on failure.
"""
messages = _extract_recent_messages(transcript_path)
if not messages:
_log("No recent messages to save")
return {"count": 0}
themes = _extract_themes(messages)
# Build a compressed diary entry from recent conversation
now = datetime.now()
topics = "|".join(m[:80] for m in messages[-10:])
entry = (
f"CHECKPOINT:{now.strftime('%Y-%m-%d')}|session:{session_id}"
f"|msgs:{len(messages)}|recent:{topics}"
)
try:
from .mcp_server import tool_diary_write
result = tool_diary_write(
agent_name="session-hook",
entry=entry,
topic="checkpoint",
)
if result.get("success"):
_log(f"Diary checkpoint saved: {result.get('entry_id', '?')}")
# Write state for ack tool to read
try:
ack_file = STATE_DIR / "last_checkpoint"
ack_file.write_text(
json.dumps({"msgs": len(messages), "ts": now.isoformat()}),
encoding="utf-8",
)
except OSError:
pass
if toast:
_desktop_toast(f"Checkpoint saved \u2014 {len(messages)} messages archived")
return {"count": len(messages), "themes": themes}
else:
_log(f"Diary checkpoint failed: {result.get('error', 'unknown')}")
except Exception as e:
_log(f"Diary checkpoint error: {e}")
return {"count": 0}
def _ingest_transcript(transcript_path: str):
"""Mine a Claude Code session transcript into the palace as a conversation."""
path = Path(transcript_path).expanduser()
if not path.is_file() or path.stat().st_size < 100:
return
from .config import MempalaceConfig
try:
MempalaceConfig() # validate config loads
except Exception:
return
try:
log_path = STATE_DIR / "hook.log"
STATE_DIR.mkdir(parents=True, exist_ok=True)
with open(log_path, "a") as log_f:
subprocess.Popen(
[
_mempalace_python(),
"-m",
"mempalace",
"mine",
str(path.parent),
"--mode",
"convos",
"--wing",
"sessions",
],
stdout=log_f,
stderr=log_f,
)
_log(f"Transcript ingest started: {path.name}")
except OSError:
pass
SUPPORTED_HARNESSES = {"claude-code", "codex"}
@@ -328,18 +530,57 @@ def hook_stop(data: dict, harness: str):
_log(f"Session {session_id}: {exchange_count} exchanges, {since_last} since last save")
if since_last >= SAVE_INTERVAL and exchange_count > 0:
# Update last save point
try:
last_save_file.write_text(str(exchange_count), encoding="utf-8")
except OSError:
pass
_log(f"TRIGGERING SAVE at exchange {exchange_count}")
# Optional: auto-ingest if MEMPAL_DIR is set
_maybe_auto_ingest(transcript_path)
# Read hook settings from config
from .config import MempalaceConfig
_output({"decision": "block", "reason": STOP_BLOCK_REASON})
try:
config = MempalaceConfig()
silent = config.hook_silent_save
toast = config.hook_desktop_toast
except Exception:
silent = True
toast = False
if silent:
# Save directly via Python API — systemMessage renders in terminal
result = {"count": 0}
if transcript_path:
result = _save_diary_direct(transcript_path, session_id, toast=toast)
_ingest_transcript(transcript_path)
_maybe_auto_ingest(transcript_path)
# Only advance save marker after successful save
count = result.get("count", 0)
if count > 0:
try:
last_save_file.write_text(str(exchange_count), encoding="utf-8")
except OSError:
pass
themes = result.get("themes", [])
if themes:
tag = " \u2014 " + ", ".join(themes)
else:
tag = ""
_output(
{
"systemMessage": f"\u2726 {count} memories woven into the palace{tag}",
}
)
else:
_output({})
else:
# Legacy: block and ask Claude to save via MCP tools.
# Marker advances before confirmed save — best-effort; if Claude
# fails to save, the checkpoint is lost but won't retry endlessly.
try:
last_save_file.write_text(str(exchange_count), encoding="utf-8")
except OSError:
pass
if transcript_path:
_ingest_transcript(transcript_path)
_maybe_auto_ingest(transcript_path)
_output({"decision": "block", "reason": STOP_BLOCK_REASON})
else:
_output({})
@@ -366,6 +607,10 @@ def hook_precompact(data: dict, harness: str):
_log(f"PRE-COMPACT triggered for session {session_id}")
# Capture tool output via our normalize path before compaction loses it
if transcript_path:
_ingest_transcript(transcript_path)
# Mine synchronously so data lands before compaction proceeds
_mine_sync(transcript_path)