From 6d8c46221996cd806b5671458d9b7437ad78cfb9 Mon Sep 17 00:00:00 2001 From: bensig Date: Sat, 4 Apr 2026 18:37:17 -0700 Subject: [PATCH] fix: resolve ruff lint and format errors across codebase Fix E402 import ordering, F841 unused variable, F541 unnecessary f-strings, F401 unused import, and auto-format 6 files. --- benchmarks/convomem_bench.py | 9 +- mempalace/cli.py | 10 +- mempalace/knowledge_graph.py | 172 ++++++++++++++++++++-------------- mempalace/mcp_server.py | 134 ++++++++++++++++++-------- mempalace/onboarding.py | 51 +++++----- mempalace/palace_graph.py | 73 +++++++++------ mempalace/split_mega_files.py | 84 +++++++++++------ 7 files changed, 333 insertions(+), 200 deletions(-) diff --git a/benchmarks/convomem_bench.py b/benchmarks/convomem_bench.py index 4fbc63c..a955615 100644 --- a/benchmarks/convomem_bench.py +++ b/benchmarks/convomem_bench.py @@ -25,20 +25,19 @@ import os import sys import json import shutil +import ssl import tempfile import argparse import urllib.request -import ssl - -# Bypass SSL for restricted environments -ssl._create_default_https_context = ssl._create_unverified_context - from pathlib import Path from collections import defaultdict from datetime import datetime import chromadb +# Bypass SSL for restricted environments +ssl._create_default_https_context = ssl._create_unverified_context + sys.path.insert(0, str(Path(__file__).parent.parent)) HF_BASE = "https://huggingface.co/datasets/Salesforce/ConvoMem/resolve/main/core_benchmark/evidence_questions" diff --git a/mempalace/cli.py b/mempalace/cli.py index 3462653..d0f097e 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -334,15 +334,19 @@ def main(): ) p_split.add_argument("dir", help="Directory containing transcript files") p_split.add_argument( - "--output-dir", default=None, + "--output-dir", + default=None, help="Write split files here (default: same directory as source files)", ) p_split.add_argument( - "--dry-run", action="store_true", + "--dry-run", + action="store_true", help="Show what would be split without writing files", ) p_split.add_argument( - "--min-sessions", type=int, default=2, + "--min-sessions", + type=int, + default=2, help="Only split files containing at least N sessions (default: 2)", ) diff --git a/mempalace/knowledge_graph.py b/mempalace/knowledge_graph.py index fb0e91e..a2f8b54 100644 --- a/mempalace/knowledge_graph.py +++ b/mempalace/knowledge_graph.py @@ -101,16 +101,23 @@ class KnowledgeGraph: conn = self._conn() conn.execute( "INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)", - (eid, name, entity_type, props) + (eid, name, entity_type, props), ) conn.commit() conn.close() return eid - def add_triple(self, subject: str, predicate: str, obj: str, - valid_from: str = None, valid_to: str = None, - confidence: float = 1.0, source_closet: str = None, - source_file: str = None): + def add_triple( + self, + subject: str, + predicate: str, + obj: str, + valid_from: str = None, + valid_to: str = None, + confidence: float = 1.0, + source_closet: str = None, + source_file: str = None, + ): """ Add a relationship triple: subject → predicate → object. @@ -125,19 +132,13 @@ class KnowledgeGraph: # Auto-create entities if they don't exist conn = self._conn() - conn.execute( - "INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", - (sub_id, subject) - ) - conn.execute( - "INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", - (obj_id, obj) - ) + conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject)) + conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj)) # Check for existing identical triple existing = conn.execute( "SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL", - (sub_id, pred, obj_id) + (sub_id, pred, obj_id), ).fetchone() if existing: @@ -149,7 +150,17 @@ class KnowledgeGraph: conn.execute( """INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", - (triple_id, sub_id, pred, obj_id, valid_from, valid_to, confidence, source_closet, source_file) + ( + triple_id, + sub_id, + pred, + obj_id, + valid_from, + valid_to, + confidence, + source_closet, + source_file, + ), ) conn.commit() conn.close() @@ -165,7 +176,7 @@ class KnowledgeGraph: conn = self._conn() conn.execute( "UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL", - (ended, sub_id, pred, obj_id) + (ended, sub_id, pred, obj_id), ) conn.commit() conn.close() @@ -191,17 +202,19 @@ class KnowledgeGraph: query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)" params.extend([as_of, as_of]) for row in conn.execute(query, params).fetchall(): - results.append({ - "direction": "outgoing", - "subject": name, - "predicate": row[2], - "object": row[10], # obj_name - "valid_from": row[4], - "valid_to": row[5], - "confidence": row[6], - "source_closet": row[7], - "current": row[5] is None, - }) + results.append( + { + "direction": "outgoing", + "subject": name, + "predicate": row[2], + "object": row[10], # obj_name + "valid_from": row[4], + "valid_to": row[5], + "confidence": row[6], + "source_closet": row[7], + "current": row[5] is None, + } + ) if direction in ("incoming", "both"): query = "SELECT t.*, e.name as sub_name FROM triples t JOIN entities e ON t.subject = e.id WHERE t.object = ?" @@ -210,17 +223,19 @@ class KnowledgeGraph: query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)" params.extend([as_of, as_of]) for row in conn.execute(query, params).fetchall(): - results.append({ - "direction": "incoming", - "subject": row[10], # sub_name - "predicate": row[2], - "object": name, - "valid_from": row[4], - "valid_to": row[5], - "confidence": row[6], - "source_closet": row[7], - "current": row[5] is None, - }) + results.append( + { + "direction": "incoming", + "subject": row[10], # sub_name + "predicate": row[2], + "object": name, + "valid_from": row[4], + "valid_to": row[5], + "confidence": row[6], + "source_closet": row[7], + "current": row[5] is None, + } + ) conn.close() return results @@ -243,14 +258,16 @@ class KnowledgeGraph: results = [] for row in conn.execute(query, params).fetchall(): - results.append({ - "subject": row[10], - "predicate": pred, - "object": row[11], - "valid_from": row[4], - "valid_to": row[5], - "current": row[5] is None, - }) + results.append( + { + "subject": row[10], + "predicate": pred, + "object": row[11], + "valid_from": row[4], + "valid_to": row[5], + "current": row[5] is None, + } + ) conn.close() return results @@ -259,14 +276,17 @@ class KnowledgeGraph: conn = self._conn() if entity_name: eid = self._entity_id(entity_name) - rows = conn.execute(""" + rows = conn.execute( + """ SELECT t.*, s.name as sub_name, o.name as obj_name FROM triples t JOIN entities s ON t.subject = s.id JOIN entities o ON t.object = o.id WHERE (t.subject = ? OR t.object = ?) ORDER BY t.valid_from ASC NULLS LAST - """, (eid, eid)).fetchall() + """, + (eid, eid), + ).fetchall() else: rows = conn.execute(""" SELECT t.*, s.name as sub_name, o.name as obj_name @@ -278,14 +298,17 @@ class KnowledgeGraph: """).fetchall() conn.close() - return [{ - "subject": r[10], - "predicate": r[2], - "object": r[11], - "valid_from": r[4], - "valid_to": r[5], - "current": r[5] is None, - } for r in rows] + return [ + { + "subject": r[10], + "predicate": r[2], + "object": r[11], + "valid_from": r[4], + "valid_to": r[5], + "current": r[5] is None, + } + for r in rows + ] # ── Stats ───────────────────────────────────────────────────────────── @@ -295,9 +318,12 @@ class KnowledgeGraph: triples = conn.execute("SELECT COUNT(*) FROM triples").fetchone()[0] current = conn.execute("SELECT COUNT(*) FROM triples WHERE valid_to IS NULL").fetchone()[0] expired = triples - current - predicates = [r[0] for r in conn.execute( - "SELECT DISTINCT predicate FROM triples ORDER BY predicate" - ).fetchall()] + predicates = [ + r[0] + for r in conn.execute( + "SELECT DISTINCT predicate FROM triples ORDER BY predicate" + ).fetchall() + ] conn.close() return { "entities": entities, @@ -317,16 +343,21 @@ class KnowledgeGraph: for key, facts in entity_facts.items(): name = facts.get("full_name", key.capitalize()) etype = facts.get("type", "person") - self.add_entity(name, etype, { - "gender": facts.get("gender", ""), - "birthday": facts.get("birthday", ""), - }) + self.add_entity( + name, + etype, + { + "gender": facts.get("gender", ""), + "birthday": facts.get("birthday", ""), + }, + ) # Relationships parent = facts.get("parent") if parent: - self.add_triple(name, "child_of", parent.capitalize(), - valid_from=facts.get("birthday")) + self.add_triple( + name, "child_of", parent.capitalize(), valid_from=facts.get("birthday") + ) partner = facts.get("partner") if partner: @@ -334,8 +365,12 @@ class KnowledgeGraph: relationship = facts.get("relationship", "") if relationship == "daughter": - self.add_triple(name, "is_child_of", facts.get("parent", "").capitalize() or name, - valid_from=facts.get("birthday")) + self.add_triple( + name, + "is_child_of", + facts.get("parent", "").capitalize() or name, + valid_from=facts.get("birthday"), + ) elif relationship == "husband": self.add_triple(name, "is_partner_of", facts.get("partner", name).capitalize()) elif relationship == "brother": @@ -346,5 +381,4 @@ class KnowledgeGraph: # Interests for interest in facts.get("interests", []): - self.add_triple(name, "loves", interest.capitalize(), - valid_from="2025-01-01") + self.add_triple(name, "loves", interest.capitalize(), valid_from="2025-01-01") diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index bc8f550..5902fab 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -26,12 +26,12 @@ from datetime import datetime from .config import MempalaceConfig from .searcher import search_memories from .palace_graph import traverse, find_tunnels, graph_stats +import chromadb + from .knowledge_graph import KnowledgeGraph _kg = KnowledgeGraph() -import chromadb - logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr) logger = logging.getLogger("mempalace_mcp") @@ -312,19 +312,24 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"): return {"entity": entity, "as_of": as_of, "facts": results, "count": len(results)} -def tool_kg_add(subject: str, predicate: str, object: str, - valid_from: str = None, source_closet: str = None): +def tool_kg_add( + subject: str, predicate: str, object: str, valid_from: str = None, source_closet: str = None +): """Add a relationship to the knowledge graph.""" - triple_id = _kg.add_triple(subject, predicate, object, - valid_from=valid_from, source_closet=source_closet) - return {"success": True, "triple_id": triple_id, - "fact": f"{subject} → {predicate} → {object}"} + triple_id = _kg.add_triple( + subject, predicate, object, valid_from=valid_from, source_closet=source_closet + ) + return {"success": True, "triple_id": triple_id, "fact": f"{subject} → {predicate} → {object}"} def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None): """Mark a fact as no longer true (set end date).""" _kg.invalidate(subject, predicate, object, ended=ended) - return {"success": True, "fact": f"{subject} → {predicate} → {object}", "ended": ended or "today"} + return { + "success": True, + "fact": f"{subject} → {predicate} → {object}", + "ended": ended or "today", + } def tool_kg_timeline(entity: str = None): @@ -362,16 +367,18 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"): col.add( ids=[entry_id], documents=[entry], - metadatas=[{ - "wing": wing, - "room": room, - "hall": "hall_diary", - "topic": topic, - "type": "diary_entry", - "agent": agent_name, - "filed_at": now.isoformat(), - "date": now.strftime("%Y-%m-%d"), - }], + metadatas=[ + { + "wing": wing, + "room": room, + "hall": "hall_diary", + "topic": topic, + "type": "diary_entry", + "agent": agent_name, + "filed_at": now.isoformat(), + "date": now.strftime("%Y-%m-%d"), + } + ], ) logger.info(f"Diary entry: {entry_id} → {wing}/diary/{topic}") return { @@ -407,12 +414,14 @@ def tool_diary_read(agent_name: str, last_n: int = 10): # Combine and sort by timestamp entries = [] for doc, meta in zip(results["documents"], results["metadatas"]): - entries.append({ - "date": meta.get("date", ""), - "timestamp": meta.get("filed_at", ""), - "topic": meta.get("topic", ""), - "content": doc, - }) + entries.append( + { + "date": meta.get("date", ""), + "timestamp": meta.get("filed_at", ""), + "topic": meta.get("topic", ""), + "content": doc, + } + ) entries.sort(key=lambda x: x["timestamp"], reverse=True) entries = entries[:last_n] @@ -465,9 +474,18 @@ TOOLS = { "input_schema": { "type": "object", "properties": { - "entity": {"type": "string", "description": "Entity to query (e.g. 'Max', 'MyProject', 'Alice')"}, - "as_of": {"type": "string", "description": "Date filter — only facts valid at this date (YYYY-MM-DD, optional)"}, - "direction": {"type": "string", "description": "outgoing (entity→?), incoming (?→entity), or both (default: both)"}, + "entity": { + "type": "string", + "description": "Entity to query (e.g. 'Max', 'MyProject', 'Alice')", + }, + "as_of": { + "type": "string", + "description": "Date filter — only facts valid at this date (YYYY-MM-DD, optional)", + }, + "direction": { + "type": "string", + "description": "outgoing (entity→?), incoming (?→entity), or both (default: both)", + }, }, "required": ["entity"], }, @@ -479,10 +497,19 @@ TOOLS = { "type": "object", "properties": { "subject": {"type": "string", "description": "The entity doing/being something"}, - "predicate": {"type": "string", "description": "The relationship type (e.g. 'loves', 'works_on', 'daughter_of')"}, + "predicate": { + "type": "string", + "description": "The relationship type (e.g. 'loves', 'works_on', 'daughter_of')", + }, "object": {"type": "string", "description": "The entity being connected to"}, - "valid_from": {"type": "string", "description": "When this became true (YYYY-MM-DD, optional)"}, - "source_closet": {"type": "string", "description": "Closet ID where this fact appears (optional)"}, + "valid_from": { + "type": "string", + "description": "When this became true (YYYY-MM-DD, optional)", + }, + "source_closet": { + "type": "string", + "description": "Closet ID where this fact appears (optional)", + }, }, "required": ["subject", "predicate", "object"], }, @@ -496,7 +523,10 @@ TOOLS = { "subject": {"type": "string", "description": "Entity"}, "predicate": {"type": "string", "description": "Relationship"}, "object": {"type": "string", "description": "Connected entity"}, - "ended": {"type": "string", "description": "When it stopped being true (YYYY-MM-DD, default: today)"}, + "ended": { + "type": "string", + "description": "When it stopped being true (YYYY-MM-DD, default: today)", + }, }, "required": ["subject", "predicate", "object"], }, @@ -507,7 +537,10 @@ TOOLS = { "input_schema": { "type": "object", "properties": { - "entity": {"type": "string", "description": "Entity to get timeline for (optional — omit for full timeline)"}, + "entity": { + "type": "string", + "description": "Entity to get timeline for (optional — omit for full timeline)", + }, }, }, "handler": tool_kg_timeline, @@ -522,8 +555,14 @@ TOOLS = { "input_schema": { "type": "object", "properties": { - "start_room": {"type": "string", "description": "Room to start from (e.g. 'chromadb-setup', 'riley-school')"}, - "max_hops": {"type": "integer", "description": "How many connections to follow (default: 2)"}, + "start_room": { + "type": "string", + "description": "Room to start from (e.g. 'chromadb-setup', 'riley-school')", + }, + "max_hops": { + "type": "integer", + "description": "How many connections to follow (default: 2)", + }, }, "required": ["start_room"], }, @@ -611,9 +650,18 @@ TOOLS = { "input_schema": { "type": "object", "properties": { - "agent_name": {"type": "string", "description": "Your name — each agent gets their own diary wing"}, - "entry": {"type": "string", "description": "Your diary entry in AAAK format — compressed, entity-coded, emotion-marked"}, - "topic": {"type": "string", "description": "Topic tag (optional, default: general)"}, + "agent_name": { + "type": "string", + "description": "Your name — each agent gets their own diary wing", + }, + "entry": { + "type": "string", + "description": "Your diary entry in AAAK format — compressed, entity-coded, emotion-marked", + }, + "topic": { + "type": "string", + "description": "Topic tag (optional, default: general)", + }, }, "required": ["agent_name", "entry"], }, @@ -624,8 +672,14 @@ TOOLS = { "input_schema": { "type": "object", "properties": { - "agent_name": {"type": "string", "description": "Your name — each agent gets their own diary wing"}, - "last_n": {"type": "integer", "description": "Number of recent entries to read (default: 10)"}, + "agent_name": { + "type": "string", + "description": "Your name — each agent gets their own diary wing", + }, + "last_n": { + "type": "integer", + "description": "Number of recent entries to read (default: 10)", + }, }, "required": ["agent_name"], }, diff --git a/mempalace/onboarding.py b/mempalace/onboarding.py index 8b9b38b..f578d91 100644 --- a/mempalace/onboarding.py +++ b/mempalace/onboarding.py @@ -263,7 +263,9 @@ def _warn_ambiguous(people: list) -> list: # ───────────────────────────────────────────────────────────────────────────── -def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: str, config_dir: Path = None): +def _generate_aaak_bootstrap( + people: list, projects: list, wings: list, mode: str, config_dir: Path = None +): """ Generate AAAK entity registry + critical facts bootstrap from onboarding data. These files teach the AI about the user's world from session one. @@ -292,7 +294,6 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st name = p["name"] code = entity_codes[name] rel = p.get("relationship", "") - ctx = p.get("context", "") registry_lines.append(f" {code}={name} ({rel})" if rel else f" {code}={name}") if projects: @@ -301,13 +302,15 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st code = proj[:4].upper() registry_lines.append(f" {code}={proj}") - registry_lines.extend([ - "", - "## AAAK Quick Reference", - " Symbols: ♡=love ★=importance ⚠=warning →=relationship |=separator", - " Structure: KEY:value | GROUP(details) | entity.attribute", - " Read naturally — expand codes, treat *markers* as emotional context.", - ]) + registry_lines.extend( + [ + "", + "## AAAK Quick Reference", + " Symbols: ♡=love ★=importance ⚠=warning →=relationship |=separator", + " Structure: KEY:value | GROUP(details) | entity.attribute", + " Read naturally — expand codes, treat *markers* as emotional context.", + ] + ) (mempalace_dir / "aaak_entities.md").write_text("\n".join(registry_lines)) @@ -325,7 +328,9 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st for p in personal_people: code = entity_codes[p["name"]] rel = p.get("relationship", "") - facts_lines.append(f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})") + facts_lines.append( + f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})" + ) facts_lines.append("") if work_people: @@ -333,7 +338,9 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st for p in work_people: code = entity_codes[p["name"]] rel = p.get("relationship", "") - facts_lines.append(f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})") + facts_lines.append( + f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})" + ) facts_lines.append("") if projects: @@ -342,13 +349,15 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st facts_lines.append(f"- **{proj}**") facts_lines.append("") - facts_lines.extend([ - "## Palace", - f"Wings: {', '.join(wings)}", - f"Mode: {mode}", - "", - "*This file will be enriched by palace_facts.py after mining.*", - ]) + facts_lines.extend( + [ + "## Palace", + f"Wings: {', '.join(wings)}", + f"Mode: {mode}", + "", + "*This file will be enriched by palace_facts.py after mining.*", + ] + ) (mempalace_dir / "critical_facts.md").write_text("\n".join(facts_lines)) @@ -433,9 +442,9 @@ def run_onboarding( print(f" {registry.summary()}") print(f"\n Wings: {', '.join(wings)}") print(f"\n Registry saved to: {registry._path}") - print(f"\n AAAK entity registry: ~/.mempalace/aaak_entities.md") - print(f" Critical facts bootstrap: ~/.mempalace/critical_facts.md") - print(f"\n Your AI will know your world from the first session.") + print("\n AAAK entity registry: ~/.mempalace/aaak_entities.md") + print(" Critical facts bootstrap: ~/.mempalace/critical_facts.md") + print("\n Your AI will know your world from the first session.") print() return registry diff --git a/mempalace/palace_graph.py b/mempalace/palace_graph.py index 8de6d48..e4fda93 100644 --- a/mempalace/palace_graph.py +++ b/mempalace/palace_graph.py @@ -71,15 +71,17 @@ def build_graph(col=None, config=None): wings = sorted(data["wings"]) if len(wings) >= 2: for i, wa in enumerate(wings): - for wb in wings[i + 1:]: + for wb in wings[i + 1 :]: for hall in data["halls"]: - edges.append({ - "room": room, - "wing_a": wa, - "wing_b": wb, - "hall": hall, - "count": data["count"], - }) + edges.append( + { + "room": room, + "wing_a": wa, + "wing_b": wb, + "hall": hall, + "count": data["count"], + } + ) # Convert sets to lists for JSON serialization nodes = {} @@ -104,17 +106,22 @@ def traverse(start_room: str, col=None, config=None, max_hops: int = 2): nodes, edges = build_graph(col, config) if start_room not in nodes: - return {"error": f"Room '{start_room}' not found", "suggestions": _fuzzy_match(start_room, nodes)} + return { + "error": f"Room '{start_room}' not found", + "suggestions": _fuzzy_match(start_room, nodes), + } start = nodes[start_room] visited = {start_room} - results = [{ - "room": start_room, - "wings": start["wings"], - "halls": start["halls"], - "count": start["count"], - "hop": 0, - }] + results = [ + { + "room": start_room, + "wings": start["wings"], + "halls": start["halls"], + "count": start["count"], + "hop": 0, + } + ] # BFS traversal frontier = [(start_room, 0)] @@ -133,14 +140,16 @@ def traverse(start_room: str, col=None, config=None, max_hops: int = 2): shared_wings = current_wings & set(data["wings"]) if shared_wings: visited.add(room) - results.append({ - "room": room, - "wings": data["wings"], - "halls": data["halls"], - "count": data["count"], - "hop": depth + 1, - "connected_via": sorted(shared_wings), - }) + results.append( + { + "room": room, + "wings": data["wings"], + "halls": data["halls"], + "count": data["count"], + "hop": depth + 1, + "connected_via": sorted(shared_wings), + } + ) if depth + 1 < max_hops: frontier.append((room, depth + 1)) @@ -167,13 +176,15 @@ def find_tunnels(wing_a: str = None, wing_b: str = None, col=None, config=None): if wing_b and wing_b not in wings: continue - tunnels.append({ - "room": room, - "wings": wings, - "halls": data["halls"], - "count": data["count"], - "recent": data["dates"][-1] if data["dates"] else "", - }) + tunnels.append( + { + "room": room, + "wings": wings, + "halls": data["halls"], + "count": data["count"], + "recent": data["dates"][-1] if data["dates"] else "", + } + ) tunnels.sort(key=lambda x: -x["count"]) return tunnels[:50] diff --git a/mempalace/split_mega_files.py b/mempalace/split_mega_files.py index b49e4f1..2071807 100644 --- a/mempalace/split_mega_files.py +++ b/mempalace/split_mega_files.py @@ -26,16 +26,16 @@ import argparse import json import os import re -import sys from pathlib import Path -HOME = Path.home() -LUMI_DIR = Path(os.environ.get("MEMPALACE_SOURCE_DIR", str(HOME / "Desktop/transcripts"))) +HOME = Path.home() +LUMI_DIR = Path(os.environ.get("MEMPALACE_SOURCE_DIR", str(HOME / "Desktop/transcripts"))) # People we know about (for name detection in content) # Loaded from ~/.mempalace/known_names.json if it exists, otherwise generic fallback. _KNOWN_NAMES_PATH = HOME / ".mempalace" / "known_names.json" + def _load_known_people() -> list: """Load known names from config file, falling back to a generic list.""" if _KNOWN_NAMES_PATH.exists(): @@ -49,6 +49,7 @@ def _load_known_people() -> list: # Generic fallback — override by creating ~/.mempalace/known_names.json return ["Alice", "Ben", "Riley", "Max", "Sam", "Devon", "Jordan"] + KNOWN_PEOPLE = _load_known_people() @@ -69,7 +70,7 @@ def is_true_session_start(lines, idx): True session start: 'Claude Code v' header NOT followed by 'Ctrl+E'/'previous messages' within the next 6 lines (those are context restores, not new sessions). """ - nearby = "".join(lines[idx:idx + 6]) + nearby = "".join(lines[idx : idx + 6]) return "Ctrl+E" not in nearby and "previous messages" not in nearby @@ -87,13 +88,20 @@ def extract_timestamp(lines): Find the first timestamp line: ⏺ H:MM AM/PM Weekday, Month DD, YYYY Returns (datetime_str, iso_str) or (None, None). """ - ts_pattern = re.compile( - r"⏺\s+(\d{1,2}:\d{2}\s+[AP]M)\s+\w+,\s+(\w+)\s+(\d{1,2}),\s+(\d{4})" - ) + ts_pattern = re.compile(r"⏺\s+(\d{1,2}:\d{2}\s+[AP]M)\s+\w+,\s+(\w+)\s+(\d{1,2}),\s+(\d{4})") months = { - "January": "01", "February": "02", "March": "03", "April": "04", - "May": "05", "June": "06", "July": "07", "August": "08", - "September": "09", "October": "10", "November": "11", "December": "12", + "January": "01", + "February": "02", + "March": "03", + "April": "04", + "May": "05", + "June": "06", + "July": "07", + "August": "08", + "September": "09", + "October": "10", + "November": "11", + "December": "12", } for line in lines[:50]: m = ts_pattern.search(line) @@ -177,16 +185,16 @@ def split_file(filepath, output_dir, dry_run=False): continue # Skip tiny fragments ts_human, ts_iso = extract_timestamp(chunk) - people = extract_people(chunk) - subject = extract_subject(chunk) + people = extract_people(chunk) + subject = extract_subject(chunk) # Build filename: SOURCESTEM__DATE_TIME_People_subject.txt # Source stem prefix prevents collisions when multiple mega-files # produce sessions with the same timestamp/people/subject. - ts_part = ts_human or f"part{i+1:02d}" + ts_part = ts_human or f"part{i + 1:02d}" people_part = "-".join(people[:3]) if people else "unknown" - src_stem = re.sub(r"[^\w-]", "_", path.stem)[:40] - name = f"{src_stem}__{ts_part}_{people_part}_{subject}.txt" + src_stem = re.sub(r"[^\w-]", "_", path.stem)[:40] + name = f"{src_stem}__{ts_part}_{people_part}_{subject}.txt" # Sanitize name = re.sub(r"[^\w\.\-]", "_", name) name = re.sub(r"_+", "_", name) @@ -194,7 +202,7 @@ def split_file(filepath, output_dir, dry_run=False): out_path = out_dir / name if dry_run: - print(f" [{i+1}/{len(boundaries)-1}] {name} ({len(chunk)} lines)") + print(f" [{i + 1}/{len(boundaries) - 1}] {name} ({len(chunk)} lines)") else: out_path.write_text("".join(chunk)) print(f" ✓ {name} ({len(chunk)} lines)") @@ -208,19 +216,33 @@ def main(): parser = argparse.ArgumentParser( description="Split concatenated transcript mega-files into per-session files" ) - parser.add_argument("--source", type=str, default=None, - help="Source directory (default: MEMPALACE_SOURCE_DIR or ~/Desktop/transcripts)") - parser.add_argument("--output-dir", type=str, default=None, - help="Output directory (default: same as source)") - parser.add_argument("--min-sessions", type=int, default=2, - help="Only split files with at least N sessions (default: 2)") - parser.add_argument("--dry-run", action="store_true", - help="Show what would happen without writing files") - parser.add_argument("--file", type=str, default=None, - help="Split a single specific file instead of scanning dir") + parser.add_argument( + "--source", + type=str, + default=None, + help="Source directory (default: MEMPALACE_SOURCE_DIR or ~/Desktop/transcripts)", + ) + parser.add_argument( + "--output-dir", type=str, default=None, help="Output directory (default: same as source)" + ) + parser.add_argument( + "--min-sessions", + type=int, + default=2, + help="Only split files with at least N sessions (default: 2)", + ) + parser.add_argument( + "--dry-run", action="store_true", help="Show what would happen without writing files" + ) + parser.add_argument( + "--file", + type=str, + default=None, + help="Split a single specific file instead of scanning dir", + ) args = parser.parse_args() - src_dir = Path(args.source) if args.source else LUMI_DIR + src_dir = Path(args.source) if args.source else LUMI_DIR output_dir = args.output_dir or None # None = same dir as file if args.file: @@ -239,13 +261,13 @@ def main(): print(f"No mega-files found in {src_dir} (min {args.min_sessions} sessions).") return - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f" Mega-file splitter — {'DRY RUN' if args.dry_run else 'SPLITTING'}") - print(f"{'='*60}") + print(f"{'=' * 60}") print(f" Source: {src_dir}") print(f" Output: {output_dir or 'same dir as source'}") print(f" Mega-files: {len(mega_files)}") - print(f"{'─'*60}\n") + print(f"{'─' * 60}\n") total_written = 0 for f, n_sessions in mega_files: @@ -260,7 +282,7 @@ def main(): else: print() - print(f"{'─'*60}") + print(f"{'─' * 60}") if args.dry_run: print(f" DRY RUN — would create {total_written} files from {len(mega_files)} mega-files") else: