Merge pull request #1231 from MemPalace/fix/hooks-convos-additive-mining
fix(hooks): always mine the active transcript as convos, additive to MEMPAL_DIR
This commit is contained in:
@@ -149,6 +149,10 @@ system prompt:
|
||||
Two Claude Code hooks save periodically and before context compression:
|
||||
[mempalaceofficial.com/guide/hooks](https://mempalaceofficial.com/guide/hooks.html).
|
||||
|
||||
For per-message recall on top of the file-level chunks the hooks produce,
|
||||
run `mempalace sweep <transcript-dir>` periodically — it stores one
|
||||
verbatim drawer per user/assistant message, idempotent and resume-safe.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
+1
-1
@@ -67,7 +67,7 @@ Edit `mempal_save_hook.sh` to change:
|
||||
|
||||
- **`SAVE_INTERVAL=15`** — How many human messages between saves. Lower = more frequent saves, higher = less interruption.
|
||||
- **`STATE_DIR`** — Where hook state is stored (defaults to `~/.mempalace/hook_state/`)
|
||||
- **`MEMPAL_DIR`** — Optional. Set to a conversations directory to auto-run `mempalace mine <dir>` on each save trigger. Leave blank (default) to let the AI handle saving via the block reason message.
|
||||
- **`MEMPAL_DIR`** — Optional **project directory** (code, notes, docs) to also mine on each save trigger, with `--mode projects`. The hook ALWAYS mines the active conversation transcript automatically with `--mode convos` — `MEMPAL_DIR` is purely additive, never an override. Leave blank if you don't want to ingest project files.
|
||||
- **`MEMPALACE_PYTHON`** — Optional env var. Python interpreter with mempalace + chromadb installed. Auto-detects: `MEMPALACE_PYTHON` env var → repo `venv/bin/python3` → system `python3`. Set this if your venv is in a non-standard location.
|
||||
|
||||
### mempalace CLI
|
||||
|
||||
@@ -41,17 +41,18 @@
|
||||
# to save everything. After the AI saves, compaction proceeds normally.
|
||||
#
|
||||
# === MEMPALACE CLI ===
|
||||
# This repo uses: mempalace mine <dir>
|
||||
# or: mempalace mine <dir> --mode convos
|
||||
# Set MEMPAL_DIR below if you want the hook to auto-ingest before compaction.
|
||||
# Leave blank to rely on the AI's own save instructions.
|
||||
# The hook ALWAYS mines the active conversation transcript synchronously
|
||||
# before compaction (via `mempalace mine <transcript-dir> --mode convos`).
|
||||
# MEMPAL_DIR is an *additional*, optional target for project files — it
|
||||
# does not replace the conversation mine.
|
||||
|
||||
STATE_DIR="$HOME/.mempalace/hook_state"
|
||||
mkdir -p "$STATE_DIR"
|
||||
|
||||
# Optional: set to the directory you want auto-ingested before compaction.
|
||||
# Example: MEMPAL_DIR="$HOME/conversations"
|
||||
# Leave empty to skip auto-ingest (AI handles saving via the block reason).
|
||||
# Optional: project directory (code / notes / docs) to also mine before
|
||||
# compaction. Mined with `--mode projects`. The conversation transcript
|
||||
# is always mined regardless — this is purely additive.
|
||||
# Example: MEMPAL_DIR="$HOME/projects/my_app"
|
||||
MEMPAL_DIR=""
|
||||
|
||||
# Resolve the Python interpreter. Same contract as mempal_save_hook.sh:
|
||||
@@ -64,15 +65,57 @@ fi
|
||||
# Read JSON input from stdin
|
||||
INPUT=$(cat)
|
||||
|
||||
SESSION_ID=$(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null)
|
||||
# Parse session_id and transcript_path in one call. Sanitize both, then
|
||||
# read sanitized values from one-per-line stdout into shell variables —
|
||||
# avoids ``eval`` on generated code (#1231 review). Same contract as
|
||||
# mempal_save_hook.sh.
|
||||
mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
|
||||
import sys, json, re
|
||||
data = json.load(sys.stdin)
|
||||
sid = data.get('session_id', 'unknown')
|
||||
tp = data.get('transcript_path', '')
|
||||
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
|
||||
print(safe(sid))
|
||||
print(safe(tp))
|
||||
" 2>/dev/null)
|
||||
SESSION_ID="${_mempal_parsed[0]:-unknown}"
|
||||
TRANSCRIPT_PATH="${_mempal_parsed[1]:-}"
|
||||
|
||||
# Expand ~ in path
|
||||
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
|
||||
|
||||
# Validate that TRANSCRIPT_PATH looks like a transcript file. Mirrors
|
||||
# mempalace.hooks_cli._validate_transcript_path so the shell hook
|
||||
# rejects the same shapes the Python hook rejects (#1231 review).
|
||||
is_valid_transcript_path() {
|
||||
local path="$1"
|
||||
[ -n "$path" ] || return 1
|
||||
case "$path" in
|
||||
*.json|*.jsonl) ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
case "/$path/" in
|
||||
*/../*) return 1 ;;
|
||||
esac
|
||||
return 0
|
||||
}
|
||||
|
||||
echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log"
|
||||
|
||||
# Optional: run mempalace ingest synchronously so memories land before compaction
|
||||
# Run ingest synchronously so memories land before compaction. Two
|
||||
# independent targets — both run if both are set:
|
||||
# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
|
||||
# 2. MEMPAL_DIR → --mode projects
|
||||
if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
|
||||
mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
|
||||
>> "$STATE_DIR/hook.log" 2>&1
|
||||
elif [ -n "$TRANSCRIPT_PATH" ]; then
|
||||
echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
|
||||
>> "$STATE_DIR/hook.log"
|
||||
fi
|
||||
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1
|
||||
mempalace mine "$MEMPAL_DIR" --mode projects \
|
||||
>> "$STATE_DIR/hook.log" 2>&1
|
||||
fi
|
||||
|
||||
# Silent: return empty JSON to not block. "decision": "allow" is invalid —
|
||||
|
||||
+53
-24
@@ -45,10 +45,10 @@
|
||||
# stop_hook_active=true so we let it through. No infinite loop.
|
||||
#
|
||||
# === MEMPALACE CLI ===
|
||||
# This repo uses: mempalace mine <dir>
|
||||
# or: mempalace mine <dir> --mode convos
|
||||
# Set MEMPAL_DIR below if you want the hook to auto-ingest after blocking.
|
||||
# Leave blank to rely on the AI's own save instructions.
|
||||
# The hook ALWAYS mines the active conversation transcript automatically
|
||||
# (via `mempalace mine <transcript-dir> --mode convos`). MEMPAL_DIR is an
|
||||
# *additional*, optional target for project files — it does not replace
|
||||
# the conversation mine.
|
||||
#
|
||||
# === CONFIGURATION ===
|
||||
|
||||
@@ -56,9 +56,10 @@ SAVE_INTERVAL=15 # Save every N human messages (adjust to taste)
|
||||
STATE_DIR="$HOME/.mempalace/hook_state"
|
||||
mkdir -p "$STATE_DIR"
|
||||
|
||||
# Optional: set to the directory you want auto-ingested on each save trigger.
|
||||
# Example: MEMPAL_DIR="$HOME/conversations"
|
||||
# Leave empty to skip auto-ingest (AI handles saving via the block reason).
|
||||
# Optional: project directory (code / notes / docs) to also mine each
|
||||
# save trigger. Mined with `--mode projects`. The conversation transcript
|
||||
# is always mined regardless — this is purely additive.
|
||||
# Example: MEMPAL_DIR="$HOME/projects/my_app"
|
||||
MEMPAL_DIR=""
|
||||
|
||||
# Resolve the Python interpreter the hook should use.
|
||||
@@ -82,9 +83,11 @@ fi
|
||||
INPUT=$(cat)
|
||||
|
||||
# Parse all fields in a single Python call (3x faster than separate invocations)
|
||||
# SECURITY: All values are sanitized before being interpolated into shell assignments.
|
||||
# stop_hook_active is coerced to a strict True/False to prevent command injection via eval.
|
||||
eval $(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
|
||||
# without invoking ``eval`` on generated code: Python prints one sanitized
|
||||
# value per line, the shell reads them via ``mapfile`` and does plain
|
||||
# variable assignment — same data, smaller blast radius if the sanitizer
|
||||
# is ever bypassed (#1231 review).
|
||||
mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "
|
||||
import sys, json, re
|
||||
data = json.load(sys.stdin)
|
||||
sid = data.get('session_id', 'unknown')
|
||||
@@ -94,14 +97,36 @@ tp = data.get('transcript_path', '')
|
||||
safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
|
||||
# Coerce stop_hook_active to strict boolean string
|
||||
sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
|
||||
print(f'SESSION_ID=\"{safe(sid)}\"')
|
||||
print(f'STOP_HOOK_ACTIVE=\"{sha}\"')
|
||||
print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
|
||||
print(safe(sid))
|
||||
print(sha)
|
||||
print(safe(tp))
|
||||
" 2>/dev/null)
|
||||
SESSION_ID="${_mempal_parsed[0]:-unknown}"
|
||||
STOP_HOOK_ACTIVE="${_mempal_parsed[1]:-False}"
|
||||
TRANSCRIPT_PATH="${_mempal_parsed[2]:-}"
|
||||
|
||||
# Expand ~ in path
|
||||
TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
|
||||
|
||||
# Validate that TRANSCRIPT_PATH looks like a transcript file:
|
||||
# - non-empty
|
||||
# - .jsonl or .json suffix
|
||||
# - no traversal segments (.. components)
|
||||
# Mirrors mempalace.hooks_cli._validate_transcript_path so the shell hook
|
||||
# rejects the same shapes the Python hook rejects (#1231 review).
|
||||
is_valid_transcript_path() {
|
||||
local path="$1"
|
||||
[ -n "$path" ] || return 1
|
||||
case "$path" in
|
||||
*.json|*.jsonl) ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
case "/$path/" in
|
||||
*/../*) return 1 ;;
|
||||
esac
|
||||
return 0
|
||||
}
|
||||
|
||||
# If we're already in a save cycle, let the AI stop normally
|
||||
# This is the infinite-loop prevention: block once → AI saves → tries to stop again → we let it through
|
||||
if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
|
||||
@@ -157,19 +182,23 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then
|
||||
|
||||
echo "[$(date '+%H:%M:%S')] TRIGGERING SAVE at exchange $EXCHANGE_COUNT" >> "$STATE_DIR/hook.log"
|
||||
|
||||
# Auto-mine the transcript. Two paths:
|
||||
# 1. TRANSCRIPT_PATH (from Claude Code) — mine the directory it lives in
|
||||
# 2. MEMPAL_DIR (user-configured) — mine that directory
|
||||
# At least one should work. If neither is set, nothing mines.
|
||||
MINE_DIR=""
|
||||
if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then
|
||||
MINE_DIR="$(dirname "$TRANSCRIPT_PATH")"
|
||||
# Auto-mine. Two independent targets — both run if both are set:
|
||||
# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos
|
||||
# (Claude Code session JSONL — must use the convo miner)
|
||||
# 2. MEMPAL_DIR (user-configured project) → --mode projects
|
||||
# (code, notes, docs)
|
||||
# MEMPAL_DIR is *additive*, not an override: a user with MEMPAL_DIR
|
||||
# pointed at their project still gets the active conversation mined.
|
||||
if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then
|
||||
mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \
|
||||
>> "$STATE_DIR/hook.log" 2>&1 &
|
||||
elif [ -n "$TRANSCRIPT_PATH" ]; then
|
||||
echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \
|
||||
>> "$STATE_DIR/hook.log"
|
||||
fi
|
||||
if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then
|
||||
MINE_DIR="$MEMPAL_DIR"
|
||||
fi
|
||||
if [ -n "$MINE_DIR" ]; then
|
||||
mempalace mine "$MINE_DIR" >> "$STATE_DIR/hook.log" 2>&1 &
|
||||
mempalace mine "$MEMPAL_DIR" --mode projects \
|
||||
>> "$STATE_DIR/hook.log" 2>&1 &
|
||||
fi
|
||||
|
||||
# MEMPAL_VERBOSE toggle:
|
||||
|
||||
+48
-29
@@ -197,27 +197,23 @@ def _output(data: dict):
|
||||
sys.stdout.buffer.flush()
|
||||
|
||||
|
||||
def _get_mine_dir(transcript_path: str = "") -> tuple[str, str]:
|
||||
"""Determine directory to mine and the miner mode to use.
|
||||
def _get_mine_targets() -> list[tuple[str, str]]:
|
||||
"""Return the list of ``(dir, mode)`` targets for auto-ingest.
|
||||
|
||||
Returns ``(dir, mode)`` where ``mode`` is ``"projects"`` or ``"convos"``.
|
||||
Empty ``dir`` means no ingest should run.
|
||||
MEMPAL_DIR (when set and resolvable) contributes a ``"projects"``
|
||||
target. Transcript ingestion is handled separately by
|
||||
``_ingest_transcript`` — emitting it here too would double-mine the
|
||||
same JSONL into a different wing on every hook fire (#1231 review).
|
||||
|
||||
MEMPAL_DIR is treated as a project directory ("projects" mode). The
|
||||
transcript-path fallback resolves to the parent of a Claude Code
|
||||
session JSONL, which must be mined with the conversation miner —
|
||||
running the projects miner there ingests JSONL as if it were source
|
||||
code.
|
||||
An empty list means no MEMPAL_DIR ingest should run.
|
||||
"""
|
||||
targets: list[tuple[str, str]] = []
|
||||
mempal_dir = os.environ.get("MEMPAL_DIR", "")
|
||||
if mempal_dir:
|
||||
resolved = Path(mempal_dir).expanduser().resolve()
|
||||
if resolved.is_dir():
|
||||
return str(resolved), "projects"
|
||||
path = _validate_transcript_path(transcript_path)
|
||||
if path is not None and path.is_file():
|
||||
return str(path.parent), "convos"
|
||||
return "", "projects"
|
||||
targets.append((str(resolved), "projects"))
|
||||
return targets
|
||||
|
||||
|
||||
_MINE_PID_FILE = STATE_DIR / "mine.pid"
|
||||
@@ -274,31 +270,52 @@ def _spawn_mine(cmd: list) -> None:
|
||||
_MINE_PID_FILE.write_text(str(proc.pid))
|
||||
|
||||
|
||||
def _maybe_auto_ingest(transcript_path: str = ""):
|
||||
"""Run mempalace mine in background if a mine directory is available."""
|
||||
mine_dir, mode = _get_mine_dir(transcript_path)
|
||||
if not mine_dir:
|
||||
def _maybe_auto_ingest():
|
||||
"""Background-mine MEMPAL_DIR (project files) if set.
|
||||
|
||||
Transcript convos are ingested separately via ``_ingest_transcript``
|
||||
in the hook handlers — this function does not handle them, to avoid
|
||||
asymmetric interpreter handling and PID-file overwrite when both
|
||||
targets fire from a single hook call (#1231 review).
|
||||
"""
|
||||
targets = _get_mine_targets()
|
||||
if not targets:
|
||||
return
|
||||
if _mine_already_running():
|
||||
_log("Skipping auto-ingest: mine already running")
|
||||
return
|
||||
for mine_dir, mode in targets:
|
||||
try:
|
||||
_spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode])
|
||||
_spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode])
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _mine_sync(transcript_path: str = ""):
|
||||
"""Run mempalace mine synchronously (for precompact -- data must land first)."""
|
||||
mine_dir, mode = _get_mine_dir(transcript_path)
|
||||
if not mine_dir:
|
||||
def _mine_sync():
|
||||
"""Synchronously mine MEMPAL_DIR (precompact path).
|
||||
|
||||
Transcript convos are ingested separately via ``_ingest_transcript``
|
||||
in ``hook_precompact`` — keeping them out of this function avoids
|
||||
timeout stacking against the harness 30s ceiling (#1231 review).
|
||||
"""
|
||||
targets = _get_mine_targets()
|
||||
if not targets:
|
||||
return
|
||||
try:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
log_path = STATE_DIR / "hook.log"
|
||||
for mine_dir, mode in targets:
|
||||
try:
|
||||
with open(log_path, "a") as log_f:
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode],
|
||||
[
|
||||
_mempalace_python(),
|
||||
"-m",
|
||||
"mempalace",
|
||||
"mine",
|
||||
mine_dir,
|
||||
"--mode",
|
||||
mode,
|
||||
],
|
||||
stdout=log_f,
|
||||
stderr=log_f,
|
||||
timeout=60,
|
||||
@@ -603,7 +620,7 @@ def hook_stop(data: dict, harness: str):
|
||||
transcript_path, session_id, wing=project_wing, toast=toast
|
||||
)
|
||||
_ingest_transcript(transcript_path)
|
||||
_maybe_auto_ingest(transcript_path)
|
||||
_maybe_auto_ingest()
|
||||
# Only advance save marker after successful save
|
||||
count = result.get("count", 0)
|
||||
if count > 0:
|
||||
@@ -633,7 +650,7 @@ def hook_stop(data: dict, harness: str):
|
||||
pass
|
||||
if transcript_path:
|
||||
_ingest_transcript(transcript_path)
|
||||
_maybe_auto_ingest(transcript_path)
|
||||
_maybe_auto_ingest()
|
||||
reason = STOP_BLOCK_REASON + f" Write diary entry to wing={project_wing}."
|
||||
_output({"decision": "block", "reason": reason})
|
||||
else:
|
||||
@@ -666,8 +683,10 @@ def hook_precompact(data: dict, harness: str):
|
||||
if transcript_path:
|
||||
_ingest_transcript(transcript_path)
|
||||
|
||||
# Mine synchronously so data lands before compaction proceeds
|
||||
_mine_sync(transcript_path)
|
||||
# Mine MEMPAL_DIR synchronously so project data lands before
|
||||
# compaction proceeds. Transcript convos were already kicked off
|
||||
# above via _ingest_transcript.
|
||||
_mine_sync()
|
||||
|
||||
_output({})
|
||||
|
||||
|
||||
+133
-72
@@ -12,7 +12,7 @@ from mempalace.hooks_cli import (
|
||||
SAVE_INTERVAL,
|
||||
_count_human_messages,
|
||||
_extract_recent_messages,
|
||||
_get_mine_dir,
|
||||
_get_mine_targets,
|
||||
_log,
|
||||
_maybe_auto_ingest,
|
||||
_mempalace_python,
|
||||
@@ -450,35 +450,26 @@ def test_maybe_auto_ingest_with_env(tmp_path):
|
||||
assert cmd[cmd.index("--mode") + 1] == "projects"
|
||||
|
||||
|
||||
def test_maybe_auto_ingest_with_transcript(tmp_path):
|
||||
"""Transcript fallback spawns mine in convos mode against the JSONL parent."""
|
||||
transcript = tmp_path / "t.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
def test_maybe_auto_ingest_uses_mempalace_python(tmp_path):
|
||||
"""Spawned mine command uses _mempalace_python(), not bare sys.executable.
|
||||
|
||||
Hook subprocesses inherit the harness PATH which on GUI-launched
|
||||
Claude Code may resolve to a system Python without chromadb. The
|
||||
interpreter used here must be the same one the hook itself runs
|
||||
under (typically the venv that owns mempalace).
|
||||
"""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
||||
with patch(
|
||||
"mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"
|
||||
):
|
||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||
_maybe_auto_ingest(str(transcript))
|
||||
mock_popen.assert_called_once()
|
||||
_maybe_auto_ingest()
|
||||
cmd = mock_popen.call_args[0][0]
|
||||
assert "mine" in cmd
|
||||
assert str(tmp_path.resolve()) in cmd
|
||||
assert cmd[cmd.index("--mode") + 1] == "convos"
|
||||
|
||||
|
||||
def test_mine_sync_with_transcript_uses_convos_mode(tmp_path):
|
||||
"""Precompact sync path also picks convos mode for JSONL transcripts."""
|
||||
transcript = tmp_path / "t.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||
with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
|
||||
_mine_sync(str(transcript))
|
||||
mock_run.assert_called_once()
|
||||
cmd = mock_run.call_args[0][0]
|
||||
assert "mine" in cmd
|
||||
assert str(tmp_path.resolve()) in cmd
|
||||
assert cmd[cmd.index("--mode") + 1] == "convos"
|
||||
assert cmd[0] == "/fake/venv/python"
|
||||
|
||||
|
||||
def test_mine_sync_with_env_uses_projects_mode(tmp_path):
|
||||
@@ -494,6 +485,57 @@ def test_mine_sync_with_env_uses_projects_mode(tmp_path):
|
||||
assert cmd[cmd.index("--mode") + 1] == "projects"
|
||||
|
||||
|
||||
def test_mine_sync_uses_mempalace_python(tmp_path):
|
||||
"""Sync mine command uses _mempalace_python(), not bare sys.executable."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||
with patch("mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"):
|
||||
with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
|
||||
_mine_sync()
|
||||
cmd = mock_run.call_args[0][0]
|
||||
assert cmd[0] == "/fake/venv/python"
|
||||
|
||||
|
||||
def test_maybe_auto_ingest_ignores_transcript_arg_path(tmp_path):
|
||||
"""_maybe_auto_ingest does NOT mine the transcript directory.
|
||||
|
||||
Transcript convos are handled by _ingest_transcript (called separately
|
||||
in hook handlers). _maybe_auto_ingest only handles MEMPAL_DIR — even
|
||||
when invoked in a context where a transcript is also being processed,
|
||||
no second spawn for the transcript dir should appear here.
|
||||
"""
|
||||
convo_dir = tmp_path / "convos"
|
||||
convo_dir.mkdir()
|
||||
transcript = convo_dir / "session.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||
_maybe_auto_ingest()
|
||||
mock_popen.assert_not_called()
|
||||
|
||||
|
||||
def test_mine_sync_ignores_transcript(tmp_path):
|
||||
"""_mine_sync does not run a convos mine for the transcript dir.
|
||||
|
||||
The precompact transcript ingest is the responsibility of
|
||||
_ingest_transcript; routing it through _mine_sync would stack a
|
||||
second 60s timeout against the harness 30s ceiling.
|
||||
"""
|
||||
convo_dir = tmp_path / "convos"
|
||||
convo_dir.mkdir()
|
||||
transcript = convo_dir / "session.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||
with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
|
||||
_mine_sync()
|
||||
mock_run.assert_not_called()
|
||||
|
||||
|
||||
def test_maybe_auto_ingest_oserror(tmp_path):
|
||||
"""OSError during subprocess spawn is silenced."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
@@ -550,70 +592,78 @@ def test_mine_already_running_corrupt_file(tmp_path):
|
||||
assert _mine_already_running() is False
|
||||
|
||||
|
||||
# --- _get_mine_dir ---
|
||||
# --- _get_mine_targets ---
|
||||
|
||||
|
||||
def test_get_mine_dir_mempal_dir(tmp_path):
|
||||
"""MEMPAL_DIR takes priority, is expanded/resolved, and is treated as projects mode."""
|
||||
def test_get_mine_targets_mempal_dir_only(tmp_path):
|
||||
"""MEMPAL_DIR alone yields a single projects target, expanded/resolved."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
transcript = tmp_path / "t.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
result_dir, result_mode = _get_mine_dir(str(transcript))
|
||||
assert Path(result_dir).resolve() == mempal_dir.resolve()
|
||||
assert result_mode == "projects"
|
||||
targets = _get_mine_targets()
|
||||
assert len(targets) == 1
|
||||
assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
|
||||
assert targets[0][1] == "projects"
|
||||
|
||||
|
||||
def test_get_mine_dir_mempal_dir_tilde(tmp_path):
|
||||
def test_get_mine_targets_mempal_dir_tilde(tmp_path):
|
||||
"""MEMPAL_DIR with a tilde prefix is expanded correctly."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
home = Path.home()
|
||||
# Build a ~-relative path only if tmp_path is inside home
|
||||
try:
|
||||
rel = mempal_dir.relative_to(home)
|
||||
except ValueError:
|
||||
pytest.skip("tmp_path is not under home, cannot build ~-relative path")
|
||||
tilde_path = "~/" + str(rel)
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": tilde_path}):
|
||||
result_dir, result_mode = _get_mine_dir("")
|
||||
assert Path(result_dir).resolve() == mempal_dir.resolve()
|
||||
assert result_mode == "projects"
|
||||
targets = _get_mine_targets()
|
||||
assert len(targets) == 1
|
||||
assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
|
||||
assert targets[0][1] == "projects"
|
||||
|
||||
|
||||
def test_get_mine_dir_transcript_fallback(tmp_path):
|
||||
"""Transcript fallback resolves to its parent dir in convos mode."""
|
||||
def test_get_mine_targets_no_transcript_target(tmp_path):
|
||||
"""_get_mine_targets does not emit a convos target for the transcript path.
|
||||
|
||||
Transcript ingestion is owned by _ingest_transcript; emitting it
|
||||
here too would double-mine the same JSONL into a different wing on
|
||||
every hook fire (#1231 review).
|
||||
"""
|
||||
transcript = tmp_path / "t.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
result_dir, result_mode = _get_mine_dir(str(transcript))
|
||||
assert Path(result_dir).resolve() == tmp_path.resolve()
|
||||
assert result_mode == "convos"
|
||||
targets = _get_mine_targets()
|
||||
assert targets == []
|
||||
|
||||
|
||||
def test_get_mine_dir_transcript_path_traversal_rejected(tmp_path):
|
||||
"""Transcript paths with '..' components are rejected and return no dir."""
|
||||
def test_get_mine_targets_only_returns_mempal_dir(tmp_path):
|
||||
"""When MEMPAL_DIR is set, exactly one projects target — never a convos target."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
targets = _get_mine_targets()
|
||||
assert len(targets) == 1
|
||||
assert targets[0][1] == "projects"
|
||||
|
||||
|
||||
def test_validate_transcript_path_traversal_rejected_jsonl(tmp_path):
|
||||
"""Path traversal is rejected even when the path has a .jsonl suffix.
|
||||
|
||||
The pre-fix test used "../../etc/passwd" which lacks an extension and
|
||||
so was rejected by the suffix gate before the traversal check ever
|
||||
fired (Copilot review on #1231). Use a .jsonl path with `..`
|
||||
segments to exercise the traversal guard specifically.
|
||||
"""
|
||||
assert _validate_transcript_path("../t.jsonl") is None
|
||||
assert _validate_transcript_path("a/../b.jsonl") is None
|
||||
assert _validate_transcript_path("/tmp/../etc/t.jsonl") is None
|
||||
|
||||
|
||||
def test_get_mine_targets_empty():
|
||||
"""Returns empty list when MEMPAL_DIR is unset or invalid."""
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
result_dir, result_mode = _get_mine_dir("../../etc/passwd")
|
||||
assert result_dir == ""
|
||||
assert result_mode == "projects"
|
||||
|
||||
|
||||
def test_get_mine_dir_transcript_non_jsonl_rejected(tmp_path):
|
||||
"""Transcript paths without .jsonl/.json extension are rejected."""
|
||||
bad = tmp_path / "notes.txt"
|
||||
bad.write_text("content")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
result_dir, result_mode = _get_mine_dir(str(bad))
|
||||
assert result_dir == ""
|
||||
assert result_mode == "projects"
|
||||
|
||||
|
||||
def test_get_mine_dir_empty():
|
||||
"""Returns empty dir when nothing is available."""
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
assert _get_mine_dir("") == ("", "projects")
|
||||
assert _get_mine_targets() == []
|
||||
|
||||
|
||||
# --- _parse_harness_input ---
|
||||
@@ -733,10 +783,19 @@ def test_precompact_with_timeout(tmp_path):
|
||||
|
||||
|
||||
def test_precompact_mines_transcript_dir(tmp_path, monkeypatch):
|
||||
"""Precompact mines transcript directory when no MEMPAL_DIR."""
|
||||
"""Precompact ingests the active transcript via _ingest_transcript.
|
||||
|
||||
With no MEMPAL_DIR, _mine_sync is a no-op; the transcript ingest is
|
||||
the only mining that should fire, and it goes through Popen
|
||||
(background) inside _ingest_transcript. Pre-#1231-review this test
|
||||
asserted against subprocess.run, which corresponded to the
|
||||
duplicate-mine path that has now been removed.
|
||||
"""
|
||||
transcript = tmp_path / "t.jsonl"
|
||||
transcript.write_text("")
|
||||
# _ingest_transcript skips files smaller than 100 bytes, so pad it.
|
||||
transcript.write_text("x" * 200)
|
||||
monkeypatch.delenv("MEMPAL_DIR", raising=False)
|
||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||
with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
|
||||
result = _capture_hook_output(
|
||||
hook_precompact,
|
||||
@@ -744,11 +803,13 @@ def test_precompact_mines_transcript_dir(tmp_path, monkeypatch):
|
||||
state_dir=tmp_path,
|
||||
)
|
||||
assert result == {}
|
||||
mock_run.assert_called_once()
|
||||
# Mine dir is the transcript's parent (resolved) and mode is convos for JSONL.
|
||||
call_args = mock_run.call_args[0][0]
|
||||
assert str(tmp_path.resolve()) in call_args
|
||||
assert call_args[call_args.index("--mode") + 1] == "convos"
|
||||
mock_run.assert_not_called()
|
||||
mock_popen.assert_called_once()
|
||||
cmd = mock_popen.call_args[0][0]
|
||||
# Mines the transcript's parent dir as convos, into wing "sessions".
|
||||
assert str(tmp_path) in cmd
|
||||
assert cmd[cmd.index("--mode") + 1] == "convos"
|
||||
assert cmd[cmd.index("--wing") + 1] == "sessions"
|
||||
|
||||
|
||||
# --- run_hook ---
|
||||
|
||||
@@ -9,6 +9,9 @@ Written BEFORE the fix.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestSaveHookAutoMines:
|
||||
@@ -16,7 +19,8 @@ class TestSaveHookAutoMines:
|
||||
|
||||
def test_hook_mines_transcript_path(self):
|
||||
"""The hook receives TRANSCRIPT_PATH from Claude Code.
|
||||
It should use that to mine the conversation, not depend on MEMPAL_DIR."""
|
||||
It should use that to mine the conversation as --mode convos,
|
||||
independently of MEMPAL_DIR (which is for project files only)."""
|
||||
hook_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(__file__)),
|
||||
"hooks",
|
||||
@@ -24,23 +28,17 @@ class TestSaveHookAutoMines:
|
||||
)
|
||||
src = open(hook_path).read()
|
||||
|
||||
# The hook ALREADY receives TRANSCRIPT_PATH in the JSON input.
|
||||
# It should use this to mine the current session's transcript
|
||||
# regardless of whether MEMPAL_DIR is set.
|
||||
# The hook must have a path that uses TRANSCRIPT_PATH to determine
|
||||
# what to mine, separate from the MEMPAL_DIR path.
|
||||
uses_transcript = "TRANSCRIPT_PATH" in src
|
||||
has_mine = "mempalace mine" in src
|
||||
# TRANSCRIPT_PATH must appear in the mining logic, not just the parse block
|
||||
transcript_drives_mine = "MINE_DIR" in src and "dirname" in src and "TRANSCRIPT_PATH" in src
|
||||
|
||||
assert uses_transcript and has_mine and transcript_drives_mine, (
|
||||
"Save hook only mines when MEMPAL_DIR is set (defaults to empty). "
|
||||
"The hook receives TRANSCRIPT_PATH from Claude Code — it should "
|
||||
"mine that file automatically so conversations are saved without "
|
||||
"the user setting an env var. Currently the hook says 'saved in "
|
||||
"background' but nothing actually saves."
|
||||
)
|
||||
# The hook must drive the conversation mine off TRANSCRIPT_PATH,
|
||||
# using `dirname` to derive the parent dir, and tagging it with
|
||||
# `--mode convos` so the convo miner runs (not the projects miner).
|
||||
assert "TRANSCRIPT_PATH" in src, "hook must read transcript_path"
|
||||
assert "mempalace mine" in src, "hook must invoke `mempalace mine`"
|
||||
assert (
|
||||
'dirname "$TRANSCRIPT_PATH"' in src
|
||||
), "hook must mine the transcript's parent directory"
|
||||
assert (
|
||||
"--mode convos" in src
|
||||
), "transcript mine must use --mode convos, not the projects miner"
|
||||
|
||||
def test_mempal_dir_default_not_empty(self):
|
||||
"""If MEMPAL_DIR is still used, it should have a sensible default,
|
||||
@@ -66,3 +64,69 @@ class TestSaveHookAutoMines:
|
||||
'MEMPAL_DIR defaults to "" which silently disables mining. '
|
||||
"Either set a default path or add transcript-based mining."
|
||||
)
|
||||
|
||||
|
||||
class TestShellHookTranscriptValidation:
|
||||
"""Both shell hooks must validate transcript paths before mining them.
|
||||
|
||||
Mirrors mempalace.hooks_cli._validate_transcript_path so unsafe paths
|
||||
(no extension, traversal segments) are rejected at the shell layer
|
||||
too — added in #1231 review (Copilot #7, #8).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _hook_src(name: str) -> str:
|
||||
path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "hooks", name)
|
||||
return open(path).read()
|
||||
|
||||
@staticmethod
|
||||
def _strip_comments(src: str) -> str:
|
||||
return "\n".join(line for line in src.splitlines() if not line.lstrip().startswith("#"))
|
||||
|
||||
def test_save_hook_defines_and_uses_validator(self):
|
||||
src = self._strip_comments(self._hook_src("mempal_save_hook.sh"))
|
||||
assert "is_valid_transcript_path() {" in src, "validator function must be defined"
|
||||
assert (
|
||||
'is_valid_transcript_path "$TRANSCRIPT_PATH"' in src
|
||||
), "validator must be invoked against TRANSCRIPT_PATH before mining"
|
||||
|
||||
def test_precompact_hook_defines_and_uses_validator(self):
|
||||
src = self._strip_comments(self._hook_src("mempal_precompact_hook.sh"))
|
||||
assert "is_valid_transcript_path() {" in src, "validator function must be defined"
|
||||
assert (
|
||||
'is_valid_transcript_path "$TRANSCRIPT_PATH"' in src
|
||||
), "validator must be invoked against TRANSCRIPT_PATH before mining"
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason="shell hooks are POSIX-only; Windows CI bash maps to wsl.exe with no distro",
|
||||
)
|
||||
def test_validators_run_via_bash(self, tmp_path):
|
||||
"""Source the validator out of each hook and exercise it directly."""
|
||||
import subprocess
|
||||
|
||||
for name in ("mempal_save_hook.sh", "mempal_precompact_hook.sh"):
|
||||
src = self._hook_src(name)
|
||||
# Extract just the function definition (first occurrence).
|
||||
start = src.index("is_valid_transcript_path() {")
|
||||
end = src.index("\n}\n", start) + 2
|
||||
func_src = src[start:end]
|
||||
script = tmp_path / "v.sh"
|
||||
script.write_text(
|
||||
f"{func_src}\n" 'is_valid_transcript_path "$1" && echo OK || echo NO\n'
|
||||
)
|
||||
|
||||
def run(arg: str) -> str:
|
||||
return subprocess.run(
|
||||
["bash", str(script), arg],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
).stdout.strip()
|
||||
|
||||
assert run("/tmp/sessions/abc.jsonl") == "OK"
|
||||
assert run("/tmp/sessions/abc.json") == "OK"
|
||||
assert run("") == "NO"
|
||||
assert run("/tmp/notes.txt") == "NO"
|
||||
assert run("../etc/passwd.jsonl") == "NO"
|
||||
assert run("/tmp/../etc/t.jsonl") == "NO"
|
||||
|
||||
Reference in New Issue
Block a user