fix: repair command, split args, Claude export, room keywords

- Add `mempalace repair` command to rebuild vector index from SQLite when HNSW files are corrupted after crash/interrupt (fixes #74, #72, #96) - Fix split command passing dir as positional instead of --source flag to split_mega_files (fixes #63) - Handle Claude privacy export format (array of conversation objects with chat_messages inside each) in normalize.py (fixes #63) - Persist room keywords in mempalace.yaml so mine can match files in docs/ to room "documentation" (fixes #108)
2026-04-07 12:02:34 -07:00
parent d1afecc478
commit 5e8a039e7c
3 changed files with 110 additions and 3 deletions
@@ -95,11 +95,33 @@ def _try_claude_code_jsonl(content: str) -> Optional[str]:


 def _try_claude_ai_json(data) -> Optional[str]:
-    """Claude.ai JSON export: [{"role": "user", "content": "..."}]"""
+    """Claude.ai JSON export: flat messages list or privacy export with chat_messages."""
    if isinstance(data, dict):
        data = data.get("messages", data.get("chat_messages", []))
    if not isinstance(data, list):
        return None
+
+    # Privacy export: array of conversation objects with chat_messages inside each
+    if data and isinstance(data[0], dict) and "chat_messages" in data[0]:
+        all_messages = []
+        for convo in data:
+            if not isinstance(convo, dict):
+                continue
+            chat_msgs = convo.get("chat_messages", [])
+            for item in chat_msgs:
+                if not isinstance(item, dict):
+                    continue
+                role = item.get("role", "")
+                text = _extract_content(item.get("content", ""))
+                if role in ("user", "human") and text:
+                    all_messages.append(("user", text))
+                elif role in ("assistant", "ai") and text:
+                    all_messages.append(("assistant", text))
+        if len(all_messages) >= 2:
+            return _messages_to_transcript(all_messages)
+        return None
+
+    # Flat messages list
    messages = []
    for item in data:
        if not isinstance(item, dict):