From 50239d4b49931833ed42e4d17c6c38b0bee1e38c Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:29:19 -0300 Subject: [PATCH] fix: sanitize SESSION_ID in save hook to prevent path traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The save hook uses SESSION_ID in file paths (state_dir/). A crafted session_id value like '../../etc/cron.d/evil' could write state files outside the intended directory. Strip everything except [a-zA-Z0-9_-] from SESSION_ID, defaulting to 'unknown' if empty after sanitization. Finding: #4 (HIGH — path traversal via SESSION_ID) Includes test infrastructure from PR #131. 92 tests pass. --- hooks/mempal_save_hook.sh | 3 +++ tests/test_dialect.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh index 217f3d7..75abfc8 100755 --- a/hooks/mempal_save_hook.sh +++ b/hooks/mempal_save_hook.sh @@ -66,6 +66,9 @@ INPUT=$(cat) # Parse fields from Claude Code's JSON SESSION_ID=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null) +# Sanitize SESSION_ID to prevent path traversal (only allow alnum, dash, underscore) +SESSION_ID=$(echo "$SESSION_ID" | tr -cd 'a-zA-Z0-9_-') +[ -z "$SESSION_ID" ] && SESSION_ID="unknown" STOP_HOOK_ACTIVE=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('stop_hook_active', False))" 2>/dev/null) TRANSCRIPT_PATH=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('transcript_path',''))" 2>/dev/null) diff --git a/tests/test_dialect.py b/tests/test_dialect.py index 2ef1df6..8edc7ec 100644 --- a/tests/test_dialect.py +++ b/tests/test_dialect.py @@ -109,11 +109,11 @@ class TestCompressionStats: original = "We decided to use GraphQL instead of REST. " * 10 compressed = d.compress(original) stats = d.compression_stats(original, compressed) - assert stats["ratio"] > 1 - assert stats["original_chars"] > stats["compressed_chars"] + assert stats["size_ratio"] > 1 + assert stats["original_chars"] > stats["summary_chars"] def test_count_tokens(self): - assert Dialect.count_tokens("hello world") == len("hello world") // 3 + assert Dialect.count_tokens("hello world") == 2 class TestZettelEncoding: