bddba59ae3
Adds a brief [!IMPORTANT] callout at the top of the README pointing users to the urgent announcement at #1388. Claude Code auto-deletes local JSONL transcripts after 30 days; users without the auto-save hooks wired are losing transcript data off the rolling window. Ships 4 small standalone tools at tools/: - backup_claude_jsonls.sh — rsync ~/.claude/projects/ to a safe folder - render_jsonl.py — convert JSONL transcripts to readable text - find_orphan_claude_jsonls.sh — scan backup locations for orphan Claude Code transcripts (multi-line shape detection + topic preview) - save.md — Claude Code slash command for manual /save into MemPalace Tools verified by independent agent against v3.3.4 source. Read-only on user data. POSIX bash + Python stdlib only.
116 lines
4.9 KiB
Bash
Executable File
116 lines
4.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# find_orphan_claude_jsonls.sh — v3 (multi-line shape + verb-aware preview)
|
|
# -----------------------------------------------------------------------------
|
|
# Finds Claude Code conversation transcripts (.jsonl) that may have survived in
|
|
# backup/sync locations. Claude Code stores transcripts at
|
|
# ~/.claude/projects/<encoded>/<session>.jsonl and auto-deletes them locally
|
|
# after 30 days. If your machine syncs to iCloud, Dropbox, Google Drive,
|
|
# OneDrive, Time Machine, or you copied transcripts elsewhere manually, those
|
|
# copies still exist. This script finds them and shows a topic preview from
|
|
# the first substantive user message — strips leading filler interjections
|
|
# ("ok so", "oh", "well", "hey") so previews surface the actual content.
|
|
#
|
|
# Read-only. Safe to re-run.
|
|
# -----------------------------------------------------------------------------
|
|
set -eu
|
|
|
|
LOCATIONS=(
|
|
"$HOME/Library/Mobile Documents" "$HOME/Dropbox" "$HOME/Google Drive"
|
|
"$HOME/OneDrive" "$HOME/Documents" "$HOME/Desktop" "/Volumes"
|
|
)
|
|
|
|
TMP="$(mktemp)"; trap 'rm -f "$TMP" "$TMP.s"' EXIT
|
|
|
|
printf "Scanning backup locations" >&2
|
|
for loc in "${LOCATIONS[@]}"; do
|
|
[ -d "$loc" ] || continue
|
|
printf "." >&2
|
|
while IFS= read -r -d '' f; do
|
|
# Combined: shape detection (multi-line) + verb-aware topic preview
|
|
if preview="$(python3 - "$f" 2>/dev/null <<'PYEOF'
|
|
import json, sys, re
|
|
|
|
# Single-word/short greetings — message gets skipped entirely if it is just one of these
|
|
GREETINGS = {'hi','hey','hello','thanks','thank you','ok','okay','yes','no',
|
|
'sure','cool','great','good','done','yep','nope','perfect','copy'}
|
|
|
|
# Leading filler — interjections that get STRIPPED from the start of a message
|
|
# before the preview is taken. Iterative — handles "ok so well, then..." → "then..."
|
|
LEADING_FILLER = re.compile(
|
|
r'^(?:ok(?:ay)?|so|oh|well|anyway|btw|hmm+|um+|uh+|hey|hi|hello|right|'
|
|
r'yes|no|sure|cool|great|good|listen|look|wait|actually|alright|gotcha|'
|
|
r'yeah|yep|nope|nah)\b[\s,!.?:;-]*',
|
|
re.IGNORECASE
|
|
)
|
|
|
|
path = sys.argv[1]
|
|
shape_ok = False
|
|
preview = ""
|
|
try:
|
|
with open(path, 'r', errors='replace') as fh:
|
|
for i, line in enumerate(fh):
|
|
if i >= 30: break
|
|
try:
|
|
d = json.loads(line)
|
|
except Exception:
|
|
continue
|
|
if not isinstance(d, dict): continue
|
|
# Shape check — accept if any line in first 30 has session fields
|
|
if not shape_ok and 'sessionId' in d and 'timestamp' in d and 'message' in d:
|
|
shape_ok = True
|
|
# Preview — first user message after stripping leading filler
|
|
if not preview:
|
|
role = d.get('type', '') or d.get('message', {}).get('role', '')
|
|
if role == 'user':
|
|
content = d.get('message', {}).get('content', '')
|
|
if isinstance(content, list):
|
|
text = ' '.join(
|
|
c.get('text', '') for c in content
|
|
if isinstance(c, dict) and c.get('type') == 'text'
|
|
)
|
|
elif isinstance(content, str):
|
|
text = content
|
|
else:
|
|
text = ''
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|
# Skip messages that are pure greetings
|
|
if text.lower() in GREETINGS:
|
|
continue
|
|
# Iteratively strip leading filler tokens until stable
|
|
prev_text = None
|
|
while prev_text != text:
|
|
prev_text = text
|
|
text = LEADING_FILLER.sub('', text).strip()
|
|
# Skip if what remains is too short
|
|
if len(text) < 20:
|
|
continue
|
|
preview = text[:80] + ('...' if len(text) > 80 else '')
|
|
if shape_ok and preview: break
|
|
except Exception:
|
|
pass
|
|
if shape_ok:
|
|
print(preview if preview else "(no preview — first 30 lines were greetings or short)")
|
|
sys.exit(0)
|
|
sys.exit(1)
|
|
PYEOF
|
|
)"; then
|
|
mtime="$(stat -f '%Sm' -t '%Y-%m-%d' "$f" 2>/dev/null || stat -c '%y' "$f" 2>/dev/null | cut -d' ' -f1)"
|
|
size="$(stat -f '%z' "$f" 2>/dev/null || stat -c '%s' "$f" 2>/dev/null)"
|
|
printf '%s\t%s\t%s\t%s\n' "$mtime" "$size" "$f" "$preview" >>"$TMP"
|
|
fi
|
|
done < <(find "$loc" -type f -name '*.jsonl' -print0 2>/dev/null)
|
|
done
|
|
printf "\n" >&2
|
|
|
|
count=$(wc -l <"$TMP" | tr -d ' ')
|
|
if [ "$count" -eq 0 ]; then
|
|
echo "No orphan Claude Code transcripts found in scanned backup locations."
|
|
exit 0
|
|
fi
|
|
sort -k1,1 "$TMP" >"$TMP.s"
|
|
oldest="$(head -n 1 "$TMP.s" | cut -f1)"
|
|
newest="$(tail -n 1 "$TMP.s" | cut -f1)"
|
|
echo "Found $count orphan Claude Code transcript(s). Oldest: $oldest Newest: $newest"
|
|
echo "----------------------------------------------------------------------"
|
|
awk -F'\t' '{ printf "%s %10s %s\n \"%s\"\n\n", $1, $2, $3, $4 }' "$TMP.s"
|