Merge pull request #1391 from MemPalace/docs/auto-save-tools-on-develop

docs: add 30-day expiry callout + ship 4 auto-save tools
2026-05-06 20:18:44 -03:00
parent f0d236019a 7c679ba625
commit 435f0ad348
5 changed files with 264 additions and 0 deletions
@@ -6,6 +6,10 @@
 > domain — including `mempalace.tech` — is an impostor and may distribute
 > malware. Details and timeline: [docs/HISTORY.md](docs/HISTORY.md).

+> [!IMPORTANT]
+> **🚨 Claude Code sessions expire in 30 days w/out auto-save hooks wired!** **[Read this →](https://github.com/MemPalace/mempalace/discussions/1388)**
+
+
 <div align="center">

 <img src="assets/mempalace_logo.png" alt="MemPalace" width="240">
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# backup_claude_jsonls.sh
+#
+# Claude Code stores every conversation as a JSONL transcript at
+#   ~/.claude/projects/<encoded-project>/<session-uuid>.jsonl
+# Anthropic auto-deletes those files after 30 DAYS:
+#   https://docs.claude.com/en/docs/claude-code/data-usage
+#
+# This script copies them, read-only, into ~/Documents/Claude_JSONL_Backup/
+# so the 30-day clock no longer applies. Re-run any time — rsync is incremental.
+# It NEVER deletes, modifies, or touches files inside ~/.claude/.
+
+set -eu
+
+SRC="${HOME}/.claude/projects/"
+DST="${HOME}/Documents/Claude_JSONL_Backup/"
+
+[ -d "$SRC" ] || { echo "ERROR: $SRC does not exist."; exit 1; }
+mkdir -p "$DST"
+
+echo "Backing up $SRC -> $DST"
+rsync -a --times "$SRC" "$DST"
+
+src_count=$(find "$SRC" -type f -name '*.jsonl' | wc -l | tr -d ' ')
+dst_count=$(find "$DST" -type f -name '*.jsonl' | wc -l | tr -d ' ')
+oldest=$(find "$DST" -type f -name '*.jsonl' -exec stat -f '%Sm %N' -t '%Y-%m-%d' {} \; 2>/dev/null \
+        || find "$DST" -type f -name '*.jsonl' -printf '%TY-%Tm-%Td %p\n' 2>/dev/null)
+oldest_date=$(echo "$oldest" | sort | head -n 1 | awk '{print $1}')
+newest_date=$(echo "$oldest" | sort | tail -n 1 | awk '{print $1}')
+
+echo "Source JSONL count : $src_count"
+echo "Backup JSONL count : $dst_count"
+echo "Oldest backup file : ${oldest_date:-n/a}"
+echo "Newest backup file : ${newest_date:-n/a}"
+
+if [ "$src_count" -ne "$dst_count" ]; then
+  echo "FAIL: count mismatch ($src_count vs $dst_count)"; exit 2
+fi
+echo "OK: backup verified."
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+# find_orphan_claude_jsonls.sh — v3 (multi-line shape + verb-aware preview)
+# -----------------------------------------------------------------------------
+# Finds Claude Code conversation transcripts (.jsonl) that may have survived in
+# backup/sync locations. Claude Code stores transcripts at
+# ~/.claude/projects/<encoded>/<session>.jsonl and auto-deletes them locally
+# after 30 days. If your machine syncs to iCloud, Dropbox, Google Drive,
+# OneDrive, Time Machine, or you copied transcripts elsewhere manually, those
+# copies still exist. This script finds them and shows a topic preview from
+# the first substantive user message — strips leading filler interjections
+# ("ok so", "oh", "well", "hey") so previews surface the actual content.
+#
+# Read-only. Safe to re-run.
+# -----------------------------------------------------------------------------
+set -eu
+
+LOCATIONS=(
+  "$HOME/Library/Mobile Documents" "$HOME/Dropbox" "$HOME/Google Drive"
+  "$HOME/OneDrive" "$HOME/Documents" "$HOME/Desktop" "/Volumes"
+)
+
+TMP="$(mktemp)"; trap 'rm -f "$TMP" "$TMP.s"' EXIT
+
+printf "Scanning backup locations" >&2
+for loc in "${LOCATIONS[@]}"; do
+  [ -d "$loc" ] || continue
+  printf "." >&2
+  while IFS= read -r -d '' f; do
+    # Combined: shape detection (multi-line) + verb-aware topic preview
+    if preview="$(python3 - "$f" 2>/dev/null <<'PYEOF'
+import json, sys, re
+
+# Single-word/short greetings — message gets skipped entirely if it is just one of these
+GREETINGS = {'hi','hey','hello','thanks','thank you','ok','okay','yes','no',
+             'sure','cool','great','good','done','yep','nope','perfect','copy'}
+
+# Leading filler — interjections that get STRIPPED from the start of a message
+# before the preview is taken. Iterative — handles "ok so well, then..." → "then..."
+LEADING_FILLER = re.compile(
+    r'^(?:ok(?:ay)?|so|oh|well|anyway|btw|hmm+|um+|uh+|hey|hi|hello|right|'
+    r'yes|no|sure|cool|great|good|listen|look|wait|actually|alright|gotcha|'
+    r'yeah|yep|nope|nah)\b[\s,!.?:;-]*',
+    re.IGNORECASE
+)
+
+path = sys.argv[1]
+shape_ok = False
+preview = ""
+try:
+    with open(path, 'r', errors='replace') as fh:
+        for i, line in enumerate(fh):
+            if i >= 30: break
+            try:
+                d = json.loads(line)
+            except Exception:
+                continue
+            if not isinstance(d, dict): continue
+            # Shape check — accept if any line in first 30 has session fields
+            if not shape_ok and 'sessionId' in d and 'timestamp' in d and 'message' in d:
+                shape_ok = True
+            # Preview — first user message after stripping leading filler
+            if not preview:
+                role = d.get('type', '') or d.get('message', {}).get('role', '')
+                if role == 'user':
+                    content = d.get('message', {}).get('content', '')
+                    if isinstance(content, list):
+                        text = ' '.join(
+                            c.get('text', '') for c in content
+                            if isinstance(c, dict) and c.get('type') == 'text'
+                        )
+                    elif isinstance(content, str):
+                        text = content
+                    else:
+                        text = ''
+                    text = re.sub(r'\s+', ' ', text).strip()
+                    # Skip messages that are pure greetings
+                    if text.lower() in GREETINGS:
+                        continue
+                    # Iteratively strip leading filler tokens until stable
+                    prev_text = None
+                    while prev_text != text:
+                        prev_text = text
+                        text = LEADING_FILLER.sub('', text).strip()
+                    # Skip if what remains is too short
+                    if len(text) < 20:
+                        continue
+                    preview = text[:80] + ('...' if len(text) > 80 else '')
+            if shape_ok and preview: break
+except Exception:
+    pass
+if shape_ok:
+    print(preview if preview else "(no preview — first 30 lines were greetings or short)")
+    sys.exit(0)
+sys.exit(1)
+PYEOF
+)"; then
+      mtime="$(stat -f '%Sm' -t '%Y-%m-%d' "$f" 2>/dev/null || stat -c '%y' "$f" 2>/dev/null | cut -d' ' -f1)"
+      size="$(stat -f '%z' "$f" 2>/dev/null || stat -c '%s' "$f" 2>/dev/null)"
+      printf '%s\t%s\t%s\t%s\n' "$mtime" "$size" "$f" "$preview" >>"$TMP"
+    fi
+  done < <(find "$loc" -type f -name '*.jsonl' -print0 2>/dev/null)
+done
+printf "\n" >&2
+
+count=$(wc -l <"$TMP" | tr -d ' ')
+if [ "$count" -eq 0 ]; then
+  echo "No orphan Claude Code transcripts found in scanned backup locations."
+  exit 0
+fi
+sort -k1,1 "$TMP" >"$TMP.s"
+oldest="$(head -n 1 "$TMP.s" | cut -f1)"
+newest="$(tail -n 1 "$TMP.s" | cut -f1)"
+echo "Found $count orphan Claude Code transcript(s). Oldest: $oldest  Newest: $newest"
+echo "----------------------------------------------------------------------"
+awk -F'\t' '{ printf "%s  %10s  %s\n              \"%s\"\n\n", $1, $2, $3, $4 }' "$TMP.s"
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+"""render_jsonl.py — turn one Claude Code JSONL transcript into readable text.
+
+Claude Code stores conversations at ~/.claude/projects/<proj>/<uuid>.jsonl and
+Anthropic auto-deletes them after 30 days
+(https://docs.claude.com/en/docs/claude-code/data-usage). This script renders a
+JSONL into a clean .txt so you can keep / read / share it without the tooling.
+
+Usage:
+    python3 render_jsonl.py <input.jsonl> [output.txt]
+
+Stdlib only. Python 3.9+. Read-only on the input.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def extract_text(content):
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, list):
+        parts = []
+        for blk in content:
+            if isinstance(blk, dict) and blk.get("type") == "text":
+                t = (blk.get("text") or "").strip()
+                if t:
+                    parts.append(t)
+        return "\n".join(parts)
+    return ""
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(__doc__)
+        sys.exit(1)
+    src = Path(sys.argv[1])
+    if not src.is_file():
+        print(f"ERROR: not a file: {src}")
+        sys.exit(1)
+    out = open(sys.argv[2], "w", encoding="utf-8") if len(sys.argv) > 2 else sys.stdout
+
+    turns, stamps = [], []
+    for raw in src.read_text(encoding="utf-8", errors="replace").splitlines():
+        if not raw.strip():
+            continue
+        try:
+            obj = json.loads(raw)
+        except json.JSONDecodeError:
+            continue
+        role = obj.get("type") or (obj.get("message") or {}).get("role")
+        if role not in ("user", "assistant"):
+            continue
+        msg = obj.get("message") or obj
+        text = extract_text(msg.get("content"))
+        if not text:
+            continue
+        ts = obj.get("timestamp") or ""
+        if ts:
+            stamps.append(ts)
+        turns.append((ts, role, text))
+
+    header = [
+        f"# Claude Code transcript: {src}",
+        f"# Total turns: {len(turns)}",
+        f"# Date range : {min(stamps) if stamps else 'n/a'}  ->  {max(stamps) if stamps else 'n/a'}",
+        "#" + "-" * 70,
+        "",
+    ]
+    out.write("\n".join(header))
+    for ts, role, text in turns:
+        out.write(f"\n[{ts}] {role.upper()}\n{text}\n\n{'-'*72}\n")
+    if out is not sys.stdout:
+        out.close()
+        print(f"Wrote {len(turns)} turns to {sys.argv[2]}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,26 @@
+---
+description: Save the current Claude Code session into MemPalace. Idempotent — won't dupe.
+---
+
+# /save
+
+Save the current Claude Code session into MemPalace. Run this when you
+want a checkpoint. Safe to run repeatedly — drawer IDs are content-hashed
+so re-running on the same session overwrites in place, no duplicates.
+
+Behavior:
+
+1. Find the current session's JSONL transcript path (Claude Code passes
+   it via the conversation context — look for `~/.claude/projects/` paths).
+2. Run via bash:
+
+   ```
+   mempalace mine "<TRANSCRIPT_PATH>" --mode convos --wing claude_imports
+   ```
+
+3. If the user supplied an argument after `/save`, use it as the wing name
+   instead of `claude_imports` (e.g. `/save my_research` →
+   `--wing my_research`).
+4. Report back: how many drawers were filed, into which wing/room.
+
+Requires `mempalace` to be installed (`pip install mempalace`).