fix: resolve ruff lint and format errors across codebase
Fix E402 import ordering, F841 unused variable, F541 unnecessary f-strings, F401 unused import, and auto-format 6 files.
This commit is contained in:
@@ -25,20 +25,19 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
|
import ssl
|
||||||
import tempfile
|
import tempfile
|
||||||
import argparse
|
import argparse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import ssl
|
|
||||||
|
|
||||||
# Bypass SSL for restricted environments
|
|
||||||
ssl._create_default_https_context = ssl._create_unverified_context
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import chromadb
|
import chromadb
|
||||||
|
|
||||||
|
# Bypass SSL for restricted environments
|
||||||
|
ssl._create_default_https_context = ssl._create_unverified_context
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
HF_BASE = "https://huggingface.co/datasets/Salesforce/ConvoMem/resolve/main/core_benchmark/evidence_questions"
|
HF_BASE = "https://huggingface.co/datasets/Salesforce/ConvoMem/resolve/main/core_benchmark/evidence_questions"
|
||||||
|
|||||||
+7
-3
@@ -334,15 +334,19 @@ def main():
|
|||||||
)
|
)
|
||||||
p_split.add_argument("dir", help="Directory containing transcript files")
|
p_split.add_argument("dir", help="Directory containing transcript files")
|
||||||
p_split.add_argument(
|
p_split.add_argument(
|
||||||
"--output-dir", default=None,
|
"--output-dir",
|
||||||
|
default=None,
|
||||||
help="Write split files here (default: same directory as source files)",
|
help="Write split files here (default: same directory as source files)",
|
||||||
)
|
)
|
||||||
p_split.add_argument(
|
p_split.add_argument(
|
||||||
"--dry-run", action="store_true",
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
help="Show what would be split without writing files",
|
help="Show what would be split without writing files",
|
||||||
)
|
)
|
||||||
p_split.add_argument(
|
p_split.add_argument(
|
||||||
"--min-sessions", type=int, default=2,
|
"--min-sessions",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
help="Only split files containing at least N sessions (default: 2)",
|
help="Only split files containing at least N sessions (default: 2)",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
+103
-69
@@ -101,16 +101,23 @@ class KnowledgeGraph:
|
|||||||
conn = self._conn()
|
conn = self._conn()
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)",
|
"INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)",
|
||||||
(eid, name, entity_type, props)
|
(eid, name, entity_type, props),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
return eid
|
return eid
|
||||||
|
|
||||||
def add_triple(self, subject: str, predicate: str, obj: str,
|
def add_triple(
|
||||||
valid_from: str = None, valid_to: str = None,
|
self,
|
||||||
confidence: float = 1.0, source_closet: str = None,
|
subject: str,
|
||||||
source_file: str = None):
|
predicate: str,
|
||||||
|
obj: str,
|
||||||
|
valid_from: str = None,
|
||||||
|
valid_to: str = None,
|
||||||
|
confidence: float = 1.0,
|
||||||
|
source_closet: str = None,
|
||||||
|
source_file: str = None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Add a relationship triple: subject → predicate → object.
|
Add a relationship triple: subject → predicate → object.
|
||||||
|
|
||||||
@@ -125,19 +132,13 @@ class KnowledgeGraph:
|
|||||||
|
|
||||||
# Auto-create entities if they don't exist
|
# Auto-create entities if they don't exist
|
||||||
conn = self._conn()
|
conn = self._conn()
|
||||||
conn.execute(
|
conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject))
|
||||||
"INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)",
|
conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj))
|
||||||
(sub_id, subject)
|
|
||||||
)
|
|
||||||
conn.execute(
|
|
||||||
"INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)",
|
|
||||||
(obj_id, obj)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check for existing identical triple
|
# Check for existing identical triple
|
||||||
existing = conn.execute(
|
existing = conn.execute(
|
||||||
"SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
|
"SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
|
||||||
(sub_id, pred, obj_id)
|
(sub_id, pred, obj_id),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
|
|
||||||
if existing:
|
if existing:
|
||||||
@@ -149,7 +150,17 @@ class KnowledgeGraph:
|
|||||||
conn.execute(
|
conn.execute(
|
||||||
"""INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file)
|
"""INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
(triple_id, sub_id, pred, obj_id, valid_from, valid_to, confidence, source_closet, source_file)
|
(
|
||||||
|
triple_id,
|
||||||
|
sub_id,
|
||||||
|
pred,
|
||||||
|
obj_id,
|
||||||
|
valid_from,
|
||||||
|
valid_to,
|
||||||
|
confidence,
|
||||||
|
source_closet,
|
||||||
|
source_file,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
@@ -165,7 +176,7 @@ class KnowledgeGraph:
|
|||||||
conn = self._conn()
|
conn = self._conn()
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
|
"UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
|
||||||
(ended, sub_id, pred, obj_id)
|
(ended, sub_id, pred, obj_id),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
@@ -191,17 +202,19 @@ class KnowledgeGraph:
|
|||||||
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
|
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
|
||||||
params.extend([as_of, as_of])
|
params.extend([as_of, as_of])
|
||||||
for row in conn.execute(query, params).fetchall():
|
for row in conn.execute(query, params).fetchall():
|
||||||
results.append({
|
results.append(
|
||||||
"direction": "outgoing",
|
{
|
||||||
"subject": name,
|
"direction": "outgoing",
|
||||||
"predicate": row[2],
|
"subject": name,
|
||||||
"object": row[10], # obj_name
|
"predicate": row[2],
|
||||||
"valid_from": row[4],
|
"object": row[10], # obj_name
|
||||||
"valid_to": row[5],
|
"valid_from": row[4],
|
||||||
"confidence": row[6],
|
"valid_to": row[5],
|
||||||
"source_closet": row[7],
|
"confidence": row[6],
|
||||||
"current": row[5] is None,
|
"source_closet": row[7],
|
||||||
})
|
"current": row[5] is None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if direction in ("incoming", "both"):
|
if direction in ("incoming", "both"):
|
||||||
query = "SELECT t.*, e.name as sub_name FROM triples t JOIN entities e ON t.subject = e.id WHERE t.object = ?"
|
query = "SELECT t.*, e.name as sub_name FROM triples t JOIN entities e ON t.subject = e.id WHERE t.object = ?"
|
||||||
@@ -210,17 +223,19 @@ class KnowledgeGraph:
|
|||||||
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
|
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
|
||||||
params.extend([as_of, as_of])
|
params.extend([as_of, as_of])
|
||||||
for row in conn.execute(query, params).fetchall():
|
for row in conn.execute(query, params).fetchall():
|
||||||
results.append({
|
results.append(
|
||||||
"direction": "incoming",
|
{
|
||||||
"subject": row[10], # sub_name
|
"direction": "incoming",
|
||||||
"predicate": row[2],
|
"subject": row[10], # sub_name
|
||||||
"object": name,
|
"predicate": row[2],
|
||||||
"valid_from": row[4],
|
"object": name,
|
||||||
"valid_to": row[5],
|
"valid_from": row[4],
|
||||||
"confidence": row[6],
|
"valid_to": row[5],
|
||||||
"source_closet": row[7],
|
"confidence": row[6],
|
||||||
"current": row[5] is None,
|
"source_closet": row[7],
|
||||||
})
|
"current": row[5] is None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
return results
|
return results
|
||||||
@@ -243,14 +258,16 @@ class KnowledgeGraph:
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
for row in conn.execute(query, params).fetchall():
|
for row in conn.execute(query, params).fetchall():
|
||||||
results.append({
|
results.append(
|
||||||
"subject": row[10],
|
{
|
||||||
"predicate": pred,
|
"subject": row[10],
|
||||||
"object": row[11],
|
"predicate": pred,
|
||||||
"valid_from": row[4],
|
"object": row[11],
|
||||||
"valid_to": row[5],
|
"valid_from": row[4],
|
||||||
"current": row[5] is None,
|
"valid_to": row[5],
|
||||||
})
|
"current": row[5] is None,
|
||||||
|
}
|
||||||
|
)
|
||||||
conn.close()
|
conn.close()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -259,14 +276,17 @@ class KnowledgeGraph:
|
|||||||
conn = self._conn()
|
conn = self._conn()
|
||||||
if entity_name:
|
if entity_name:
|
||||||
eid = self._entity_id(entity_name)
|
eid = self._entity_id(entity_name)
|
||||||
rows = conn.execute("""
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
SELECT t.*, s.name as sub_name, o.name as obj_name
|
SELECT t.*, s.name as sub_name, o.name as obj_name
|
||||||
FROM triples t
|
FROM triples t
|
||||||
JOIN entities s ON t.subject = s.id
|
JOIN entities s ON t.subject = s.id
|
||||||
JOIN entities o ON t.object = o.id
|
JOIN entities o ON t.object = o.id
|
||||||
WHERE (t.subject = ? OR t.object = ?)
|
WHERE (t.subject = ? OR t.object = ?)
|
||||||
ORDER BY t.valid_from ASC NULLS LAST
|
ORDER BY t.valid_from ASC NULLS LAST
|
||||||
""", (eid, eid)).fetchall()
|
""",
|
||||||
|
(eid, eid),
|
||||||
|
).fetchall()
|
||||||
else:
|
else:
|
||||||
rows = conn.execute("""
|
rows = conn.execute("""
|
||||||
SELECT t.*, s.name as sub_name, o.name as obj_name
|
SELECT t.*, s.name as sub_name, o.name as obj_name
|
||||||
@@ -278,14 +298,17 @@ class KnowledgeGraph:
|
|||||||
""").fetchall()
|
""").fetchall()
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
return [{
|
return [
|
||||||
"subject": r[10],
|
{
|
||||||
"predicate": r[2],
|
"subject": r[10],
|
||||||
"object": r[11],
|
"predicate": r[2],
|
||||||
"valid_from": r[4],
|
"object": r[11],
|
||||||
"valid_to": r[5],
|
"valid_from": r[4],
|
||||||
"current": r[5] is None,
|
"valid_to": r[5],
|
||||||
} for r in rows]
|
"current": r[5] is None,
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
# ── Stats ─────────────────────────────────────────────────────────────
|
# ── Stats ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -295,9 +318,12 @@ class KnowledgeGraph:
|
|||||||
triples = conn.execute("SELECT COUNT(*) FROM triples").fetchone()[0]
|
triples = conn.execute("SELECT COUNT(*) FROM triples").fetchone()[0]
|
||||||
current = conn.execute("SELECT COUNT(*) FROM triples WHERE valid_to IS NULL").fetchone()[0]
|
current = conn.execute("SELECT COUNT(*) FROM triples WHERE valid_to IS NULL").fetchone()[0]
|
||||||
expired = triples - current
|
expired = triples - current
|
||||||
predicates = [r[0] for r in conn.execute(
|
predicates = [
|
||||||
"SELECT DISTINCT predicate FROM triples ORDER BY predicate"
|
r[0]
|
||||||
).fetchall()]
|
for r in conn.execute(
|
||||||
|
"SELECT DISTINCT predicate FROM triples ORDER BY predicate"
|
||||||
|
).fetchall()
|
||||||
|
]
|
||||||
conn.close()
|
conn.close()
|
||||||
return {
|
return {
|
||||||
"entities": entities,
|
"entities": entities,
|
||||||
@@ -317,16 +343,21 @@ class KnowledgeGraph:
|
|||||||
for key, facts in entity_facts.items():
|
for key, facts in entity_facts.items():
|
||||||
name = facts.get("full_name", key.capitalize())
|
name = facts.get("full_name", key.capitalize())
|
||||||
etype = facts.get("type", "person")
|
etype = facts.get("type", "person")
|
||||||
self.add_entity(name, etype, {
|
self.add_entity(
|
||||||
"gender": facts.get("gender", ""),
|
name,
|
||||||
"birthday": facts.get("birthday", ""),
|
etype,
|
||||||
})
|
{
|
||||||
|
"gender": facts.get("gender", ""),
|
||||||
|
"birthday": facts.get("birthday", ""),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
# Relationships
|
# Relationships
|
||||||
parent = facts.get("parent")
|
parent = facts.get("parent")
|
||||||
if parent:
|
if parent:
|
||||||
self.add_triple(name, "child_of", parent.capitalize(),
|
self.add_triple(
|
||||||
valid_from=facts.get("birthday"))
|
name, "child_of", parent.capitalize(), valid_from=facts.get("birthday")
|
||||||
|
)
|
||||||
|
|
||||||
partner = facts.get("partner")
|
partner = facts.get("partner")
|
||||||
if partner:
|
if partner:
|
||||||
@@ -334,8 +365,12 @@ class KnowledgeGraph:
|
|||||||
|
|
||||||
relationship = facts.get("relationship", "")
|
relationship = facts.get("relationship", "")
|
||||||
if relationship == "daughter":
|
if relationship == "daughter":
|
||||||
self.add_triple(name, "is_child_of", facts.get("parent", "").capitalize() or name,
|
self.add_triple(
|
||||||
valid_from=facts.get("birthday"))
|
name,
|
||||||
|
"is_child_of",
|
||||||
|
facts.get("parent", "").capitalize() or name,
|
||||||
|
valid_from=facts.get("birthday"),
|
||||||
|
)
|
||||||
elif relationship == "husband":
|
elif relationship == "husband":
|
||||||
self.add_triple(name, "is_partner_of", facts.get("partner", name).capitalize())
|
self.add_triple(name, "is_partner_of", facts.get("partner", name).capitalize())
|
||||||
elif relationship == "brother":
|
elif relationship == "brother":
|
||||||
@@ -346,5 +381,4 @@ class KnowledgeGraph:
|
|||||||
|
|
||||||
# Interests
|
# Interests
|
||||||
for interest in facts.get("interests", []):
|
for interest in facts.get("interests", []):
|
||||||
self.add_triple(name, "loves", interest.capitalize(),
|
self.add_triple(name, "loves", interest.capitalize(), valid_from="2025-01-01")
|
||||||
valid_from="2025-01-01")
|
|
||||||
|
|||||||
+94
-40
@@ -26,12 +26,12 @@ from datetime import datetime
|
|||||||
from .config import MempalaceConfig
|
from .config import MempalaceConfig
|
||||||
from .searcher import search_memories
|
from .searcher import search_memories
|
||||||
from .palace_graph import traverse, find_tunnels, graph_stats
|
from .palace_graph import traverse, find_tunnels, graph_stats
|
||||||
|
import chromadb
|
||||||
|
|
||||||
from .knowledge_graph import KnowledgeGraph
|
from .knowledge_graph import KnowledgeGraph
|
||||||
|
|
||||||
_kg = KnowledgeGraph()
|
_kg = KnowledgeGraph()
|
||||||
|
|
||||||
import chromadb
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr)
|
logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr)
|
||||||
logger = logging.getLogger("mempalace_mcp")
|
logger = logging.getLogger("mempalace_mcp")
|
||||||
|
|
||||||
@@ -312,19 +312,24 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"):
|
|||||||
return {"entity": entity, "as_of": as_of, "facts": results, "count": len(results)}
|
return {"entity": entity, "as_of": as_of, "facts": results, "count": len(results)}
|
||||||
|
|
||||||
|
|
||||||
def tool_kg_add(subject: str, predicate: str, object: str,
|
def tool_kg_add(
|
||||||
valid_from: str = None, source_closet: str = None):
|
subject: str, predicate: str, object: str, valid_from: str = None, source_closet: str = None
|
||||||
|
):
|
||||||
"""Add a relationship to the knowledge graph."""
|
"""Add a relationship to the knowledge graph."""
|
||||||
triple_id = _kg.add_triple(subject, predicate, object,
|
triple_id = _kg.add_triple(
|
||||||
valid_from=valid_from, source_closet=source_closet)
|
subject, predicate, object, valid_from=valid_from, source_closet=source_closet
|
||||||
return {"success": True, "triple_id": triple_id,
|
)
|
||||||
"fact": f"{subject} → {predicate} → {object}"}
|
return {"success": True, "triple_id": triple_id, "fact": f"{subject} → {predicate} → {object}"}
|
||||||
|
|
||||||
|
|
||||||
def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None):
|
def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None):
|
||||||
"""Mark a fact as no longer true (set end date)."""
|
"""Mark a fact as no longer true (set end date)."""
|
||||||
_kg.invalidate(subject, predicate, object, ended=ended)
|
_kg.invalidate(subject, predicate, object, ended=ended)
|
||||||
return {"success": True, "fact": f"{subject} → {predicate} → {object}", "ended": ended or "today"}
|
return {
|
||||||
|
"success": True,
|
||||||
|
"fact": f"{subject} → {predicate} → {object}",
|
||||||
|
"ended": ended or "today",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def tool_kg_timeline(entity: str = None):
|
def tool_kg_timeline(entity: str = None):
|
||||||
@@ -362,16 +367,18 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
|
|||||||
col.add(
|
col.add(
|
||||||
ids=[entry_id],
|
ids=[entry_id],
|
||||||
documents=[entry],
|
documents=[entry],
|
||||||
metadatas=[{
|
metadatas=[
|
||||||
"wing": wing,
|
{
|
||||||
"room": room,
|
"wing": wing,
|
||||||
"hall": "hall_diary",
|
"room": room,
|
||||||
"topic": topic,
|
"hall": "hall_diary",
|
||||||
"type": "diary_entry",
|
"topic": topic,
|
||||||
"agent": agent_name,
|
"type": "diary_entry",
|
||||||
"filed_at": now.isoformat(),
|
"agent": agent_name,
|
||||||
"date": now.strftime("%Y-%m-%d"),
|
"filed_at": now.isoformat(),
|
||||||
}],
|
"date": now.strftime("%Y-%m-%d"),
|
||||||
|
}
|
||||||
|
],
|
||||||
)
|
)
|
||||||
logger.info(f"Diary entry: {entry_id} → {wing}/diary/{topic}")
|
logger.info(f"Diary entry: {entry_id} → {wing}/diary/{topic}")
|
||||||
return {
|
return {
|
||||||
@@ -407,12 +414,14 @@ def tool_diary_read(agent_name: str, last_n: int = 10):
|
|||||||
# Combine and sort by timestamp
|
# Combine and sort by timestamp
|
||||||
entries = []
|
entries = []
|
||||||
for doc, meta in zip(results["documents"], results["metadatas"]):
|
for doc, meta in zip(results["documents"], results["metadatas"]):
|
||||||
entries.append({
|
entries.append(
|
||||||
"date": meta.get("date", ""),
|
{
|
||||||
"timestamp": meta.get("filed_at", ""),
|
"date": meta.get("date", ""),
|
||||||
"topic": meta.get("topic", ""),
|
"timestamp": meta.get("filed_at", ""),
|
||||||
"content": doc,
|
"topic": meta.get("topic", ""),
|
||||||
})
|
"content": doc,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
entries.sort(key=lambda x: x["timestamp"], reverse=True)
|
entries.sort(key=lambda x: x["timestamp"], reverse=True)
|
||||||
entries = entries[:last_n]
|
entries = entries[:last_n]
|
||||||
@@ -465,9 +474,18 @@ TOOLS = {
|
|||||||
"input_schema": {
|
"input_schema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"entity": {"type": "string", "description": "Entity to query (e.g. 'Max', 'MyProject', 'Alice')"},
|
"entity": {
|
||||||
"as_of": {"type": "string", "description": "Date filter — only facts valid at this date (YYYY-MM-DD, optional)"},
|
"type": "string",
|
||||||
"direction": {"type": "string", "description": "outgoing (entity→?), incoming (?→entity), or both (default: both)"},
|
"description": "Entity to query (e.g. 'Max', 'MyProject', 'Alice')",
|
||||||
|
},
|
||||||
|
"as_of": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Date filter — only facts valid at this date (YYYY-MM-DD, optional)",
|
||||||
|
},
|
||||||
|
"direction": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "outgoing (entity→?), incoming (?→entity), or both (default: both)",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": ["entity"],
|
"required": ["entity"],
|
||||||
},
|
},
|
||||||
@@ -479,10 +497,19 @@ TOOLS = {
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"subject": {"type": "string", "description": "The entity doing/being something"},
|
"subject": {"type": "string", "description": "The entity doing/being something"},
|
||||||
"predicate": {"type": "string", "description": "The relationship type (e.g. 'loves', 'works_on', 'daughter_of')"},
|
"predicate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The relationship type (e.g. 'loves', 'works_on', 'daughter_of')",
|
||||||
|
},
|
||||||
"object": {"type": "string", "description": "The entity being connected to"},
|
"object": {"type": "string", "description": "The entity being connected to"},
|
||||||
"valid_from": {"type": "string", "description": "When this became true (YYYY-MM-DD, optional)"},
|
"valid_from": {
|
||||||
"source_closet": {"type": "string", "description": "Closet ID where this fact appears (optional)"},
|
"type": "string",
|
||||||
|
"description": "When this became true (YYYY-MM-DD, optional)",
|
||||||
|
},
|
||||||
|
"source_closet": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Closet ID where this fact appears (optional)",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": ["subject", "predicate", "object"],
|
"required": ["subject", "predicate", "object"],
|
||||||
},
|
},
|
||||||
@@ -496,7 +523,10 @@ TOOLS = {
|
|||||||
"subject": {"type": "string", "description": "Entity"},
|
"subject": {"type": "string", "description": "Entity"},
|
||||||
"predicate": {"type": "string", "description": "Relationship"},
|
"predicate": {"type": "string", "description": "Relationship"},
|
||||||
"object": {"type": "string", "description": "Connected entity"},
|
"object": {"type": "string", "description": "Connected entity"},
|
||||||
"ended": {"type": "string", "description": "When it stopped being true (YYYY-MM-DD, default: today)"},
|
"ended": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "When it stopped being true (YYYY-MM-DD, default: today)",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": ["subject", "predicate", "object"],
|
"required": ["subject", "predicate", "object"],
|
||||||
},
|
},
|
||||||
@@ -507,7 +537,10 @@ TOOLS = {
|
|||||||
"input_schema": {
|
"input_schema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"entity": {"type": "string", "description": "Entity to get timeline for (optional — omit for full timeline)"},
|
"entity": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Entity to get timeline for (optional — omit for full timeline)",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"handler": tool_kg_timeline,
|
"handler": tool_kg_timeline,
|
||||||
@@ -522,8 +555,14 @@ TOOLS = {
|
|||||||
"input_schema": {
|
"input_schema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"start_room": {"type": "string", "description": "Room to start from (e.g. 'chromadb-setup', 'riley-school')"},
|
"start_room": {
|
||||||
"max_hops": {"type": "integer", "description": "How many connections to follow (default: 2)"},
|
"type": "string",
|
||||||
|
"description": "Room to start from (e.g. 'chromadb-setup', 'riley-school')",
|
||||||
|
},
|
||||||
|
"max_hops": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "How many connections to follow (default: 2)",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": ["start_room"],
|
"required": ["start_room"],
|
||||||
},
|
},
|
||||||
@@ -611,9 +650,18 @@ TOOLS = {
|
|||||||
"input_schema": {
|
"input_schema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"agent_name": {"type": "string", "description": "Your name — each agent gets their own diary wing"},
|
"agent_name": {
|
||||||
"entry": {"type": "string", "description": "Your diary entry in AAAK format — compressed, entity-coded, emotion-marked"},
|
"type": "string",
|
||||||
"topic": {"type": "string", "description": "Topic tag (optional, default: general)"},
|
"description": "Your name — each agent gets their own diary wing",
|
||||||
|
},
|
||||||
|
"entry": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Your diary entry in AAAK format — compressed, entity-coded, emotion-marked",
|
||||||
|
},
|
||||||
|
"topic": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Topic tag (optional, default: general)",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": ["agent_name", "entry"],
|
"required": ["agent_name", "entry"],
|
||||||
},
|
},
|
||||||
@@ -624,8 +672,14 @@ TOOLS = {
|
|||||||
"input_schema": {
|
"input_schema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"agent_name": {"type": "string", "description": "Your name — each agent gets their own diary wing"},
|
"agent_name": {
|
||||||
"last_n": {"type": "integer", "description": "Number of recent entries to read (default: 10)"},
|
"type": "string",
|
||||||
|
"description": "Your name — each agent gets their own diary wing",
|
||||||
|
},
|
||||||
|
"last_n": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Number of recent entries to read (default: 10)",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": ["agent_name"],
|
"required": ["agent_name"],
|
||||||
},
|
},
|
||||||
|
|||||||
+30
-21
@@ -263,7 +263,9 @@ def _warn_ambiguous(people: list) -> list:
|
|||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: str, config_dir: Path = None):
|
def _generate_aaak_bootstrap(
|
||||||
|
people: list, projects: list, wings: list, mode: str, config_dir: Path = None
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Generate AAAK entity registry + critical facts bootstrap from onboarding data.
|
Generate AAAK entity registry + critical facts bootstrap from onboarding data.
|
||||||
These files teach the AI about the user's world from session one.
|
These files teach the AI about the user's world from session one.
|
||||||
@@ -292,7 +294,6 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
|
|||||||
name = p["name"]
|
name = p["name"]
|
||||||
code = entity_codes[name]
|
code = entity_codes[name]
|
||||||
rel = p.get("relationship", "")
|
rel = p.get("relationship", "")
|
||||||
ctx = p.get("context", "")
|
|
||||||
registry_lines.append(f" {code}={name} ({rel})" if rel else f" {code}={name}")
|
registry_lines.append(f" {code}={name} ({rel})" if rel else f" {code}={name}")
|
||||||
|
|
||||||
if projects:
|
if projects:
|
||||||
@@ -301,13 +302,15 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
|
|||||||
code = proj[:4].upper()
|
code = proj[:4].upper()
|
||||||
registry_lines.append(f" {code}={proj}")
|
registry_lines.append(f" {code}={proj}")
|
||||||
|
|
||||||
registry_lines.extend([
|
registry_lines.extend(
|
||||||
"",
|
[
|
||||||
"## AAAK Quick Reference",
|
"",
|
||||||
" Symbols: ♡=love ★=importance ⚠=warning →=relationship |=separator",
|
"## AAAK Quick Reference",
|
||||||
" Structure: KEY:value | GROUP(details) | entity.attribute",
|
" Symbols: ♡=love ★=importance ⚠=warning →=relationship |=separator",
|
||||||
" Read naturally — expand codes, treat *markers* as emotional context.",
|
" Structure: KEY:value | GROUP(details) | entity.attribute",
|
||||||
])
|
" Read naturally — expand codes, treat *markers* as emotional context.",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
(mempalace_dir / "aaak_entities.md").write_text("\n".join(registry_lines))
|
(mempalace_dir / "aaak_entities.md").write_text("\n".join(registry_lines))
|
||||||
|
|
||||||
@@ -325,7 +328,9 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
|
|||||||
for p in personal_people:
|
for p in personal_people:
|
||||||
code = entity_codes[p["name"]]
|
code = entity_codes[p["name"]]
|
||||||
rel = p.get("relationship", "")
|
rel = p.get("relationship", "")
|
||||||
facts_lines.append(f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})")
|
facts_lines.append(
|
||||||
|
f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})"
|
||||||
|
)
|
||||||
facts_lines.append("")
|
facts_lines.append("")
|
||||||
|
|
||||||
if work_people:
|
if work_people:
|
||||||
@@ -333,7 +338,9 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
|
|||||||
for p in work_people:
|
for p in work_people:
|
||||||
code = entity_codes[p["name"]]
|
code = entity_codes[p["name"]]
|
||||||
rel = p.get("relationship", "")
|
rel = p.get("relationship", "")
|
||||||
facts_lines.append(f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})")
|
facts_lines.append(
|
||||||
|
f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})"
|
||||||
|
)
|
||||||
facts_lines.append("")
|
facts_lines.append("")
|
||||||
|
|
||||||
if projects:
|
if projects:
|
||||||
@@ -342,13 +349,15 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
|
|||||||
facts_lines.append(f"- **{proj}**")
|
facts_lines.append(f"- **{proj}**")
|
||||||
facts_lines.append("")
|
facts_lines.append("")
|
||||||
|
|
||||||
facts_lines.extend([
|
facts_lines.extend(
|
||||||
"## Palace",
|
[
|
||||||
f"Wings: {', '.join(wings)}",
|
"## Palace",
|
||||||
f"Mode: {mode}",
|
f"Wings: {', '.join(wings)}",
|
||||||
"",
|
f"Mode: {mode}",
|
||||||
"*This file will be enriched by palace_facts.py after mining.*",
|
"",
|
||||||
])
|
"*This file will be enriched by palace_facts.py after mining.*",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
(mempalace_dir / "critical_facts.md").write_text("\n".join(facts_lines))
|
(mempalace_dir / "critical_facts.md").write_text("\n".join(facts_lines))
|
||||||
|
|
||||||
@@ -433,9 +442,9 @@ def run_onboarding(
|
|||||||
print(f" {registry.summary()}")
|
print(f" {registry.summary()}")
|
||||||
print(f"\n Wings: {', '.join(wings)}")
|
print(f"\n Wings: {', '.join(wings)}")
|
||||||
print(f"\n Registry saved to: {registry._path}")
|
print(f"\n Registry saved to: {registry._path}")
|
||||||
print(f"\n AAAK entity registry: ~/.mempalace/aaak_entities.md")
|
print("\n AAAK entity registry: ~/.mempalace/aaak_entities.md")
|
||||||
print(f" Critical facts bootstrap: ~/.mempalace/critical_facts.md")
|
print(" Critical facts bootstrap: ~/.mempalace/critical_facts.md")
|
||||||
print(f"\n Your AI will know your world from the first session.")
|
print("\n Your AI will know your world from the first session.")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
return registry
|
return registry
|
||||||
|
|||||||
+42
-31
@@ -71,15 +71,17 @@ def build_graph(col=None, config=None):
|
|||||||
wings = sorted(data["wings"])
|
wings = sorted(data["wings"])
|
||||||
if len(wings) >= 2:
|
if len(wings) >= 2:
|
||||||
for i, wa in enumerate(wings):
|
for i, wa in enumerate(wings):
|
||||||
for wb in wings[i + 1:]:
|
for wb in wings[i + 1 :]:
|
||||||
for hall in data["halls"]:
|
for hall in data["halls"]:
|
||||||
edges.append({
|
edges.append(
|
||||||
"room": room,
|
{
|
||||||
"wing_a": wa,
|
"room": room,
|
||||||
"wing_b": wb,
|
"wing_a": wa,
|
||||||
"hall": hall,
|
"wing_b": wb,
|
||||||
"count": data["count"],
|
"hall": hall,
|
||||||
})
|
"count": data["count"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Convert sets to lists for JSON serialization
|
# Convert sets to lists for JSON serialization
|
||||||
nodes = {}
|
nodes = {}
|
||||||
@@ -104,17 +106,22 @@ def traverse(start_room: str, col=None, config=None, max_hops: int = 2):
|
|||||||
nodes, edges = build_graph(col, config)
|
nodes, edges = build_graph(col, config)
|
||||||
|
|
||||||
if start_room not in nodes:
|
if start_room not in nodes:
|
||||||
return {"error": f"Room '{start_room}' not found", "suggestions": _fuzzy_match(start_room, nodes)}
|
return {
|
||||||
|
"error": f"Room '{start_room}' not found",
|
||||||
|
"suggestions": _fuzzy_match(start_room, nodes),
|
||||||
|
}
|
||||||
|
|
||||||
start = nodes[start_room]
|
start = nodes[start_room]
|
||||||
visited = {start_room}
|
visited = {start_room}
|
||||||
results = [{
|
results = [
|
||||||
"room": start_room,
|
{
|
||||||
"wings": start["wings"],
|
"room": start_room,
|
||||||
"halls": start["halls"],
|
"wings": start["wings"],
|
||||||
"count": start["count"],
|
"halls": start["halls"],
|
||||||
"hop": 0,
|
"count": start["count"],
|
||||||
}]
|
"hop": 0,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
# BFS traversal
|
# BFS traversal
|
||||||
frontier = [(start_room, 0)]
|
frontier = [(start_room, 0)]
|
||||||
@@ -133,14 +140,16 @@ def traverse(start_room: str, col=None, config=None, max_hops: int = 2):
|
|||||||
shared_wings = current_wings & set(data["wings"])
|
shared_wings = current_wings & set(data["wings"])
|
||||||
if shared_wings:
|
if shared_wings:
|
||||||
visited.add(room)
|
visited.add(room)
|
||||||
results.append({
|
results.append(
|
||||||
"room": room,
|
{
|
||||||
"wings": data["wings"],
|
"room": room,
|
||||||
"halls": data["halls"],
|
"wings": data["wings"],
|
||||||
"count": data["count"],
|
"halls": data["halls"],
|
||||||
"hop": depth + 1,
|
"count": data["count"],
|
||||||
"connected_via": sorted(shared_wings),
|
"hop": depth + 1,
|
||||||
})
|
"connected_via": sorted(shared_wings),
|
||||||
|
}
|
||||||
|
)
|
||||||
if depth + 1 < max_hops:
|
if depth + 1 < max_hops:
|
||||||
frontier.append((room, depth + 1))
|
frontier.append((room, depth + 1))
|
||||||
|
|
||||||
@@ -167,13 +176,15 @@ def find_tunnels(wing_a: str = None, wing_b: str = None, col=None, config=None):
|
|||||||
if wing_b and wing_b not in wings:
|
if wing_b and wing_b not in wings:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tunnels.append({
|
tunnels.append(
|
||||||
"room": room,
|
{
|
||||||
"wings": wings,
|
"room": room,
|
||||||
"halls": data["halls"],
|
"wings": wings,
|
||||||
"count": data["count"],
|
"halls": data["halls"],
|
||||||
"recent": data["dates"][-1] if data["dates"] else "",
|
"count": data["count"],
|
||||||
})
|
"recent": data["dates"][-1] if data["dates"] else "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
tunnels.sort(key=lambda x: -x["count"])
|
tunnels.sort(key=lambda x: -x["count"])
|
||||||
return tunnels[:50]
|
return tunnels[:50]
|
||||||
|
|||||||
@@ -26,16 +26,16 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
HOME = Path.home()
|
HOME = Path.home()
|
||||||
LUMI_DIR = Path(os.environ.get("MEMPALACE_SOURCE_DIR", str(HOME / "Desktop/transcripts")))
|
LUMI_DIR = Path(os.environ.get("MEMPALACE_SOURCE_DIR", str(HOME / "Desktop/transcripts")))
|
||||||
|
|
||||||
# People we know about (for name detection in content)
|
# People we know about (for name detection in content)
|
||||||
# Loaded from ~/.mempalace/known_names.json if it exists, otherwise generic fallback.
|
# Loaded from ~/.mempalace/known_names.json if it exists, otherwise generic fallback.
|
||||||
_KNOWN_NAMES_PATH = HOME / ".mempalace" / "known_names.json"
|
_KNOWN_NAMES_PATH = HOME / ".mempalace" / "known_names.json"
|
||||||
|
|
||||||
|
|
||||||
def _load_known_people() -> list:
|
def _load_known_people() -> list:
|
||||||
"""Load known names from config file, falling back to a generic list."""
|
"""Load known names from config file, falling back to a generic list."""
|
||||||
if _KNOWN_NAMES_PATH.exists():
|
if _KNOWN_NAMES_PATH.exists():
|
||||||
@@ -49,6 +49,7 @@ def _load_known_people() -> list:
|
|||||||
# Generic fallback — override by creating ~/.mempalace/known_names.json
|
# Generic fallback — override by creating ~/.mempalace/known_names.json
|
||||||
return ["Alice", "Ben", "Riley", "Max", "Sam", "Devon", "Jordan"]
|
return ["Alice", "Ben", "Riley", "Max", "Sam", "Devon", "Jordan"]
|
||||||
|
|
||||||
|
|
||||||
KNOWN_PEOPLE = _load_known_people()
|
KNOWN_PEOPLE = _load_known_people()
|
||||||
|
|
||||||
|
|
||||||
@@ -69,7 +70,7 @@ def is_true_session_start(lines, idx):
|
|||||||
True session start: 'Claude Code v' header NOT followed by 'Ctrl+E'/'previous messages'
|
True session start: 'Claude Code v' header NOT followed by 'Ctrl+E'/'previous messages'
|
||||||
within the next 6 lines (those are context restores, not new sessions).
|
within the next 6 lines (those are context restores, not new sessions).
|
||||||
"""
|
"""
|
||||||
nearby = "".join(lines[idx:idx + 6])
|
nearby = "".join(lines[idx : idx + 6])
|
||||||
return "Ctrl+E" not in nearby and "previous messages" not in nearby
|
return "Ctrl+E" not in nearby and "previous messages" not in nearby
|
||||||
|
|
||||||
|
|
||||||
@@ -87,13 +88,20 @@ def extract_timestamp(lines):
|
|||||||
Find the first timestamp line: ⏺ H:MM AM/PM Weekday, Month DD, YYYY
|
Find the first timestamp line: ⏺ H:MM AM/PM Weekday, Month DD, YYYY
|
||||||
Returns (datetime_str, iso_str) or (None, None).
|
Returns (datetime_str, iso_str) or (None, None).
|
||||||
"""
|
"""
|
||||||
ts_pattern = re.compile(
|
ts_pattern = re.compile(r"⏺\s+(\d{1,2}:\d{2}\s+[AP]M)\s+\w+,\s+(\w+)\s+(\d{1,2}),\s+(\d{4})")
|
||||||
r"⏺\s+(\d{1,2}:\d{2}\s+[AP]M)\s+\w+,\s+(\w+)\s+(\d{1,2}),\s+(\d{4})"
|
|
||||||
)
|
|
||||||
months = {
|
months = {
|
||||||
"January": "01", "February": "02", "March": "03", "April": "04",
|
"January": "01",
|
||||||
"May": "05", "June": "06", "July": "07", "August": "08",
|
"February": "02",
|
||||||
"September": "09", "October": "10", "November": "11", "December": "12",
|
"March": "03",
|
||||||
|
"April": "04",
|
||||||
|
"May": "05",
|
||||||
|
"June": "06",
|
||||||
|
"July": "07",
|
||||||
|
"August": "08",
|
||||||
|
"September": "09",
|
||||||
|
"October": "10",
|
||||||
|
"November": "11",
|
||||||
|
"December": "12",
|
||||||
}
|
}
|
||||||
for line in lines[:50]:
|
for line in lines[:50]:
|
||||||
m = ts_pattern.search(line)
|
m = ts_pattern.search(line)
|
||||||
@@ -177,16 +185,16 @@ def split_file(filepath, output_dir, dry_run=False):
|
|||||||
continue # Skip tiny fragments
|
continue # Skip tiny fragments
|
||||||
|
|
||||||
ts_human, ts_iso = extract_timestamp(chunk)
|
ts_human, ts_iso = extract_timestamp(chunk)
|
||||||
people = extract_people(chunk)
|
people = extract_people(chunk)
|
||||||
subject = extract_subject(chunk)
|
subject = extract_subject(chunk)
|
||||||
|
|
||||||
# Build filename: SOURCESTEM__DATE_TIME_People_subject.txt
|
# Build filename: SOURCESTEM__DATE_TIME_People_subject.txt
|
||||||
# Source stem prefix prevents collisions when multiple mega-files
|
# Source stem prefix prevents collisions when multiple mega-files
|
||||||
# produce sessions with the same timestamp/people/subject.
|
# produce sessions with the same timestamp/people/subject.
|
||||||
ts_part = ts_human or f"part{i+1:02d}"
|
ts_part = ts_human or f"part{i + 1:02d}"
|
||||||
people_part = "-".join(people[:3]) if people else "unknown"
|
people_part = "-".join(people[:3]) if people else "unknown"
|
||||||
src_stem = re.sub(r"[^\w-]", "_", path.stem)[:40]
|
src_stem = re.sub(r"[^\w-]", "_", path.stem)[:40]
|
||||||
name = f"{src_stem}__{ts_part}_{people_part}_{subject}.txt"
|
name = f"{src_stem}__{ts_part}_{people_part}_{subject}.txt"
|
||||||
# Sanitize
|
# Sanitize
|
||||||
name = re.sub(r"[^\w\.\-]", "_", name)
|
name = re.sub(r"[^\w\.\-]", "_", name)
|
||||||
name = re.sub(r"_+", "_", name)
|
name = re.sub(r"_+", "_", name)
|
||||||
@@ -194,7 +202,7 @@ def split_file(filepath, output_dir, dry_run=False):
|
|||||||
out_path = out_dir / name
|
out_path = out_dir / name
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(f" [{i+1}/{len(boundaries)-1}] {name} ({len(chunk)} lines)")
|
print(f" [{i + 1}/{len(boundaries) - 1}] {name} ({len(chunk)} lines)")
|
||||||
else:
|
else:
|
||||||
out_path.write_text("".join(chunk))
|
out_path.write_text("".join(chunk))
|
||||||
print(f" ✓ {name} ({len(chunk)} lines)")
|
print(f" ✓ {name} ({len(chunk)} lines)")
|
||||||
@@ -208,19 +216,33 @@ def main():
|
|||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Split concatenated transcript mega-files into per-session files"
|
description="Split concatenated transcript mega-files into per-session files"
|
||||||
)
|
)
|
||||||
parser.add_argument("--source", type=str, default=None,
|
parser.add_argument(
|
||||||
help="Source directory (default: MEMPALACE_SOURCE_DIR or ~/Desktop/transcripts)")
|
"--source",
|
||||||
parser.add_argument("--output-dir", type=str, default=None,
|
type=str,
|
||||||
help="Output directory (default: same as source)")
|
default=None,
|
||||||
parser.add_argument("--min-sessions", type=int, default=2,
|
help="Source directory (default: MEMPALACE_SOURCE_DIR or ~/Desktop/transcripts)",
|
||||||
help="Only split files with at least N sessions (default: 2)")
|
)
|
||||||
parser.add_argument("--dry-run", action="store_true",
|
parser.add_argument(
|
||||||
help="Show what would happen without writing files")
|
"--output-dir", type=str, default=None, help="Output directory (default: same as source)"
|
||||||
parser.add_argument("--file", type=str, default=None,
|
)
|
||||||
help="Split a single specific file instead of scanning dir")
|
parser.add_argument(
|
||||||
|
"--min-sessions",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
|
help="Only split files with at least N sessions (default: 2)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run", action="store_true", help="Show what would happen without writing files"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--file",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Split a single specific file instead of scanning dir",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
src_dir = Path(args.source) if args.source else LUMI_DIR
|
src_dir = Path(args.source) if args.source else LUMI_DIR
|
||||||
output_dir = args.output_dir or None # None = same dir as file
|
output_dir = args.output_dir or None # None = same dir as file
|
||||||
|
|
||||||
if args.file:
|
if args.file:
|
||||||
@@ -239,13 +261,13 @@ def main():
|
|||||||
print(f"No mega-files found in {src_dir} (min {args.min_sessions} sessions).")
|
print(f"No mega-files found in {src_dir} (min {args.min_sessions} sessions).")
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'=' * 60}")
|
||||||
print(f" Mega-file splitter — {'DRY RUN' if args.dry_run else 'SPLITTING'}")
|
print(f" Mega-file splitter — {'DRY RUN' if args.dry_run else 'SPLITTING'}")
|
||||||
print(f"{'='*60}")
|
print(f"{'=' * 60}")
|
||||||
print(f" Source: {src_dir}")
|
print(f" Source: {src_dir}")
|
||||||
print(f" Output: {output_dir or 'same dir as source'}")
|
print(f" Output: {output_dir or 'same dir as source'}")
|
||||||
print(f" Mega-files: {len(mega_files)}")
|
print(f" Mega-files: {len(mega_files)}")
|
||||||
print(f"{'─'*60}\n")
|
print(f"{'─' * 60}\n")
|
||||||
|
|
||||||
total_written = 0
|
total_written = 0
|
||||||
for f, n_sessions in mega_files:
|
for f, n_sessions in mega_files:
|
||||||
@@ -260,7 +282,7 @@ def main():
|
|||||||
else:
|
else:
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print(f"{'─'*60}")
|
print(f"{'─' * 60}")
|
||||||
if args.dry_run:
|
if args.dry_run:
|
||||||
print(f" DRY RUN — would create {total_written} files from {len(mega_files)} mega-files")
|
print(f" DRY RUN — would create {total_written} files from {len(mega_files)} mega-files")
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user