068dbd9a7b
The memory system: - Palace structure: Wings (people/projects) → Rooms (topics) → Closets (AAAK compressed) → Drawers (verbatim transcripts) - Halls connect related rooms within a wing - Tunnels cross-reference rooms across wings - AAAK: 30x lossless compression dialect for AI agents - Knowledge graph: temporal entity-relationship triples (SQLite) - Palace graph: room-based navigation with tunnel detection - MCP server: 19 tools — search, graph traversal, agent diary, AAAK auto-teach - Onboarding: guided setup generates wing config + AAAK entity registry - Contradiction detection: catches wrong pronouns, names, ages - Auto-save hooks for Claude Code 96.6% Recall@5 on LongMemEval — highest zero-API score published. 100% with optional Haiku rerank (500/500). Local. Free. No API key required.
1051 lines
32 KiB
Python
1051 lines
32 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
AAAK Dialect -- Compressed Symbolic Memory Language
|
|
====================================================
|
|
|
|
A structured symbolic format that ANY LLM reads natively at ~30x compression.
|
|
Not latent vectors. Not English prose. A universal memory compression dialect.
|
|
|
|
Works with: Claude, ChatGPT, Gemini, Llama, Mistral -- any model that reads text.
|
|
|
|
Adapted for mempalace: works standalone on plain text and ChromaDB drawers.
|
|
No dependency on palace.py or layers.py.
|
|
|
|
FORMAT:
|
|
Header: FILE_NUM|PRIMARY_ENTITY|DATE|TITLE
|
|
Zettel: ZID:ENTITIES|topic_keywords|"key_quote"|WEIGHT|EMOTIONS|FLAGS
|
|
Tunnel: T:ZID<->ZID|label
|
|
Arc: ARC:emotion->emotion->emotion
|
|
|
|
EMOTION CODES (universal):
|
|
vul=vulnerability, joy=joy, fear=fear, trust=trust
|
|
grief=grief, wonder=wonder, rage=rage, love=love
|
|
hope=hope, despair=despair, peace=peace, humor=humor
|
|
tender=tenderness, raw=raw_honesty, doubt=self_doubt
|
|
relief=relief, anx=anxiety, exhaust=exhaustion
|
|
convict=conviction, passion=quiet_passion
|
|
|
|
FLAGS:
|
|
ORIGIN = origin moment (birth of something)
|
|
CORE = core belief or identity pillar
|
|
SENSITIVE = handle with absolute care
|
|
PIVOT = emotional turning point
|
|
GENESIS = led directly to something existing
|
|
DECISION = explicit decision or choice
|
|
TECHNICAL = technical architecture or implementation detail
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
from typing import List, Dict, Optional
|
|
from pathlib import Path
|
|
|
|
|
|
# === EMOTION CODES (universal) ===
|
|
|
|
EMOTION_CODES = {
|
|
"vulnerability": "vul",
|
|
"vulnerable": "vul",
|
|
"joy": "joy",
|
|
"joyful": "joy",
|
|
"fear": "fear",
|
|
"mild_fear": "fear",
|
|
"trust": "trust",
|
|
"trust_building": "trust",
|
|
"grief": "grief",
|
|
"raw_grief": "grief",
|
|
"wonder": "wonder",
|
|
"philosophical_wonder": "wonder",
|
|
"rage": "rage",
|
|
"anger": "rage",
|
|
"love": "love",
|
|
"devotion": "love",
|
|
"hope": "hope",
|
|
"despair": "despair",
|
|
"hopelessness": "despair",
|
|
"peace": "peace",
|
|
"relief": "relief",
|
|
"humor": "humor",
|
|
"dark_humor": "humor",
|
|
"tenderness": "tender",
|
|
"raw_honesty": "raw",
|
|
"brutal_honesty": "raw",
|
|
"self_doubt": "doubt",
|
|
"anxiety": "anx",
|
|
"exhaustion": "exhaust",
|
|
"conviction": "convict",
|
|
"quiet_passion": "passion",
|
|
"warmth": "warmth",
|
|
"curiosity": "curious",
|
|
"gratitude": "grat",
|
|
"frustration": "frust",
|
|
"confusion": "confuse",
|
|
"satisfaction": "satis",
|
|
"excitement": "excite",
|
|
"determination": "determ",
|
|
"surprise": "surprise",
|
|
}
|
|
|
|
# Keywords that signal emotions in plain text
|
|
_EMOTION_SIGNALS = {
|
|
"decided": "determ",
|
|
"prefer": "convict",
|
|
"worried": "anx",
|
|
"excited": "excite",
|
|
"frustrated": "frust",
|
|
"confused": "confuse",
|
|
"love": "love",
|
|
"hate": "rage",
|
|
"hope": "hope",
|
|
"fear": "fear",
|
|
"trust": "trust",
|
|
"happy": "joy",
|
|
"sad": "grief",
|
|
"surprised": "surprise",
|
|
"grateful": "grat",
|
|
"curious": "curious",
|
|
"wonder": "wonder",
|
|
"anxious": "anx",
|
|
"relieved": "relief",
|
|
"satisf": "satis",
|
|
"disappoint": "grief",
|
|
"concern": "anx",
|
|
}
|
|
|
|
# Keywords that signal flags
|
|
_FLAG_SIGNALS = {
|
|
"decided": "DECISION",
|
|
"chose": "DECISION",
|
|
"switched": "DECISION",
|
|
"migrated": "DECISION",
|
|
"replaced": "DECISION",
|
|
"instead of": "DECISION",
|
|
"because": "DECISION",
|
|
"founded": "ORIGIN",
|
|
"created": "ORIGIN",
|
|
"started": "ORIGIN",
|
|
"born": "ORIGIN",
|
|
"launched": "ORIGIN",
|
|
"first time": "ORIGIN",
|
|
"core": "CORE",
|
|
"fundamental": "CORE",
|
|
"essential": "CORE",
|
|
"principle": "CORE",
|
|
"belief": "CORE",
|
|
"always": "CORE",
|
|
"never forget": "CORE",
|
|
"turning point": "PIVOT",
|
|
"changed everything": "PIVOT",
|
|
"realized": "PIVOT",
|
|
"breakthrough": "PIVOT",
|
|
"epiphany": "PIVOT",
|
|
"api": "TECHNICAL",
|
|
"database": "TECHNICAL",
|
|
"architecture": "TECHNICAL",
|
|
"deploy": "TECHNICAL",
|
|
"infrastructure": "TECHNICAL",
|
|
"algorithm": "TECHNICAL",
|
|
"framework": "TECHNICAL",
|
|
"server": "TECHNICAL",
|
|
"config": "TECHNICAL",
|
|
}
|
|
|
|
# Common filler/stop words to strip from topic extraction
|
|
_STOP_WORDS = {
|
|
"the",
|
|
"a",
|
|
"an",
|
|
"is",
|
|
"are",
|
|
"was",
|
|
"were",
|
|
"be",
|
|
"been",
|
|
"being",
|
|
"have",
|
|
"has",
|
|
"had",
|
|
"do",
|
|
"does",
|
|
"did",
|
|
"will",
|
|
"would",
|
|
"could",
|
|
"should",
|
|
"may",
|
|
"might",
|
|
"shall",
|
|
"can",
|
|
"to",
|
|
"of",
|
|
"in",
|
|
"for",
|
|
"on",
|
|
"with",
|
|
"at",
|
|
"by",
|
|
"from",
|
|
"as",
|
|
"into",
|
|
"about",
|
|
"between",
|
|
"through",
|
|
"during",
|
|
"before",
|
|
"after",
|
|
"above",
|
|
"below",
|
|
"up",
|
|
"down",
|
|
"out",
|
|
"off",
|
|
"over",
|
|
"under",
|
|
"again",
|
|
"further",
|
|
"then",
|
|
"once",
|
|
"here",
|
|
"there",
|
|
"when",
|
|
"where",
|
|
"why",
|
|
"how",
|
|
"all",
|
|
"each",
|
|
"every",
|
|
"both",
|
|
"few",
|
|
"more",
|
|
"most",
|
|
"other",
|
|
"some",
|
|
"such",
|
|
"no",
|
|
"nor",
|
|
"not",
|
|
"only",
|
|
"own",
|
|
"same",
|
|
"so",
|
|
"than",
|
|
"too",
|
|
"very",
|
|
"just",
|
|
"don",
|
|
"now",
|
|
"and",
|
|
"but",
|
|
"or",
|
|
"if",
|
|
"while",
|
|
"that",
|
|
"this",
|
|
"these",
|
|
"those",
|
|
"it",
|
|
"its",
|
|
"i",
|
|
"we",
|
|
"you",
|
|
"he",
|
|
"she",
|
|
"they",
|
|
"me",
|
|
"him",
|
|
"her",
|
|
"us",
|
|
"them",
|
|
"my",
|
|
"your",
|
|
"his",
|
|
"our",
|
|
"their",
|
|
"what",
|
|
"which",
|
|
"who",
|
|
"whom",
|
|
"also",
|
|
"much",
|
|
"many",
|
|
"like",
|
|
"because",
|
|
"since",
|
|
"get",
|
|
"got",
|
|
"use",
|
|
"used",
|
|
"using",
|
|
"make",
|
|
"made",
|
|
"thing",
|
|
"things",
|
|
"way",
|
|
"well",
|
|
"really",
|
|
"want",
|
|
"need",
|
|
}
|
|
|
|
|
|
class Dialect:
|
|
"""
|
|
AAAK Dialect encoder -- works on plain text or structured zettel data.
|
|
|
|
Usage:
|
|
# Basic: compress any text
|
|
dialect = Dialect()
|
|
compressed = dialect.compress("We decided to use GraphQL instead of REST...")
|
|
|
|
# With entity mappings
|
|
dialect = Dialect(entities={"Alice": "ALC", "Bob": "BOB"})
|
|
|
|
# From config file
|
|
dialect = Dialect.from_config("entities.json")
|
|
|
|
# Compress zettel JSON (original format)
|
|
compressed = dialect.compress_file("zettels/file_001.json")
|
|
|
|
# Generate Layer 1 wake-up file
|
|
dialect.generate_layer1("zettels/", output="LAYER1.aaak")
|
|
"""
|
|
|
|
def __init__(self, entities: Dict[str, str] = None, skip_names: List[str] = None):
|
|
"""
|
|
Args:
|
|
entities: Mapping of full names -> short codes.
|
|
e.g. {"Alice": "ALC", "Bob": "BOB"}
|
|
If None, entities are auto-coded from first 3 chars.
|
|
skip_names: Names to skip (fictional characters, etc.)
|
|
"""
|
|
self.entity_codes = {}
|
|
if entities:
|
|
for name, code in entities.items():
|
|
self.entity_codes[name] = code
|
|
self.entity_codes[name.lower()] = code
|
|
self.skip_names = [n.lower() for n in (skip_names or [])]
|
|
|
|
@classmethod
|
|
def from_config(cls, config_path: str) -> "Dialect":
|
|
"""Load entity mappings from a JSON config file.
|
|
|
|
Config format:
|
|
{
|
|
"entities": {"Alice": "ALC", "Bob": "BOB"},
|
|
"skip_names": ["Gandalf", "Sherlock"]
|
|
}
|
|
"""
|
|
with open(config_path, "r") as f:
|
|
config = json.load(f)
|
|
return cls(
|
|
entities=config.get("entities", {}),
|
|
skip_names=config.get("skip_names", []),
|
|
)
|
|
|
|
def save_config(self, config_path: str):
|
|
"""Save current entity mappings to a JSON config file."""
|
|
canonical = {}
|
|
seen_codes = set()
|
|
for name, code in self.entity_codes.items():
|
|
if code not in seen_codes and not name.islower():
|
|
canonical[name] = code
|
|
seen_codes.add(code)
|
|
elif code not in seen_codes:
|
|
canonical[name] = code
|
|
seen_codes.add(code)
|
|
|
|
config = {
|
|
"entities": canonical,
|
|
"skip_names": self.skip_names,
|
|
}
|
|
with open(config_path, "w") as f:
|
|
json.dump(config, f, indent=2)
|
|
|
|
# === ENCODING (entity/emotion primitives) ===
|
|
|
|
def encode_entity(self, name: str) -> Optional[str]:
|
|
"""Convert a person/entity name to its short code."""
|
|
if any(s in name.lower() for s in self.skip_names):
|
|
return None
|
|
if name in self.entity_codes:
|
|
return self.entity_codes[name]
|
|
if name.lower() in self.entity_codes:
|
|
return self.entity_codes[name.lower()]
|
|
for key, code in self.entity_codes.items():
|
|
if key.lower() in name.lower():
|
|
return code
|
|
# Auto-code: first 3 chars uppercase
|
|
return name[:3].upper()
|
|
|
|
def encode_emotions(self, emotions: List[str]) -> str:
|
|
"""Convert emotion list to compact codes."""
|
|
codes = []
|
|
for e in emotions:
|
|
code = EMOTION_CODES.get(e, e[:4])
|
|
if code not in codes:
|
|
codes.append(code)
|
|
return "+".join(codes[:3])
|
|
|
|
def get_flags(self, zettel: dict) -> str:
|
|
"""Extract flags from zettel metadata."""
|
|
flags = []
|
|
if zettel.get("origin_moment"):
|
|
flags.append("ORIGIN")
|
|
if zettel.get("sensitivity", "").upper().startswith("MAXIMUM"):
|
|
flags.append("SENSITIVE")
|
|
notes = zettel.get("notes", "").lower()
|
|
if "foundational pillar" in notes or "core" in notes:
|
|
flags.append("CORE")
|
|
if "genesis" in notes or "genesis" in zettel.get("origin_label", "").lower():
|
|
flags.append("GENESIS")
|
|
if "pivot" in notes:
|
|
flags.append("PIVOT")
|
|
return "+".join(flags) if flags else ""
|
|
|
|
# === PLAIN TEXT COMPRESSION (new for mempalace) ===
|
|
|
|
def _detect_emotions(self, text: str) -> List[str]:
|
|
"""Detect emotions from plain text using keyword signals."""
|
|
text_lower = text.lower()
|
|
detected = []
|
|
seen = set()
|
|
for keyword, code in _EMOTION_SIGNALS.items():
|
|
if keyword in text_lower and code not in seen:
|
|
detected.append(code)
|
|
seen.add(code)
|
|
return detected[:3]
|
|
|
|
def _detect_flags(self, text: str) -> List[str]:
|
|
"""Detect importance flags from plain text using keyword signals."""
|
|
text_lower = text.lower()
|
|
detected = []
|
|
seen = set()
|
|
for keyword, flag in _FLAG_SIGNALS.items():
|
|
if keyword in text_lower and flag not in seen:
|
|
detected.append(flag)
|
|
seen.add(flag)
|
|
return detected[:3]
|
|
|
|
def _extract_topics(self, text: str, max_topics: int = 3) -> List[str]:
|
|
"""Extract key topic words from plain text."""
|
|
# Tokenize: alphanumeric words, lowercase
|
|
words = re.findall(r"[a-zA-Z][a-zA-Z_-]{2,}", text)
|
|
# Count frequency, skip stop words
|
|
freq = {}
|
|
for w in words:
|
|
w_lower = w.lower()
|
|
if w_lower in _STOP_WORDS or len(w_lower) < 3:
|
|
continue
|
|
freq[w_lower] = freq.get(w_lower, 0) + 1
|
|
|
|
# Also boost words that look like proper nouns or technical terms
|
|
for w in words:
|
|
w_lower = w.lower()
|
|
if w_lower in _STOP_WORDS:
|
|
continue
|
|
if w[0].isupper() and w_lower in freq:
|
|
freq[w_lower] += 2
|
|
# CamelCase or has underscore/hyphen
|
|
if "_" in w or "-" in w or (any(c.isupper() for c in w[1:])):
|
|
if w_lower in freq:
|
|
freq[w_lower] += 2
|
|
|
|
ranked = sorted(freq.items(), key=lambda x: -x[1])
|
|
return [w for w, _ in ranked[:max_topics]]
|
|
|
|
def _extract_key_sentence(self, text: str) -> str:
|
|
"""Extract the most important sentence fragment from text."""
|
|
# Split into sentences
|
|
sentences = re.split(r"[.!?\n]+", text)
|
|
sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
|
|
if not sentences:
|
|
return ""
|
|
|
|
# Score each sentence
|
|
decision_words = {
|
|
"decided",
|
|
"because",
|
|
"instead",
|
|
"prefer",
|
|
"switched",
|
|
"chose",
|
|
"realized",
|
|
"important",
|
|
"key",
|
|
"critical",
|
|
"discovered",
|
|
"learned",
|
|
"conclusion",
|
|
"solution",
|
|
"reason",
|
|
"why",
|
|
"breakthrough",
|
|
"insight",
|
|
}
|
|
scored = []
|
|
for s in sentences:
|
|
score = 0
|
|
s_lower = s.lower()
|
|
for w in decision_words:
|
|
if w in s_lower:
|
|
score += 2
|
|
# Prefer shorter, punchier sentences
|
|
if len(s) < 80:
|
|
score += 1
|
|
if len(s) < 40:
|
|
score += 1
|
|
# Penalize very long sentences
|
|
if len(s) > 150:
|
|
score -= 2
|
|
scored.append((score, s))
|
|
|
|
scored.sort(key=lambda x: -x[0])
|
|
best = scored[0][1]
|
|
# Truncate if too long
|
|
if len(best) > 55:
|
|
best = best[:52] + "..."
|
|
return best
|
|
|
|
def _detect_entities_in_text(self, text: str) -> List[str]:
|
|
"""Find known entities in text, or detect capitalized names."""
|
|
found = []
|
|
# Check known entities
|
|
for name, code in self.entity_codes.items():
|
|
if not name.islower() and name.lower() in text.lower():
|
|
if code not in found:
|
|
found.append(code)
|
|
if found:
|
|
return found
|
|
|
|
# Fallback: find capitalized words that look like names (2+ chars, not sentence-start)
|
|
words = text.split()
|
|
for i, w in enumerate(words):
|
|
clean = re.sub(r"[^a-zA-Z]", "", w)
|
|
if (
|
|
len(clean) >= 2
|
|
and clean[0].isupper()
|
|
and clean[1:].islower()
|
|
and i > 0
|
|
and clean.lower() not in _STOP_WORDS
|
|
):
|
|
code = clean[:3].upper()
|
|
if code not in found:
|
|
found.append(code)
|
|
if len(found) >= 3:
|
|
break
|
|
return found
|
|
|
|
def compress(self, text: str, metadata: dict = None) -> str:
|
|
"""
|
|
Compress plain text into AAAK Dialect format.
|
|
|
|
This is the primary method for mempalace: takes any text content
|
|
(drawer content, transcript chunk, note) and returns a compressed
|
|
symbolic representation.
|
|
|
|
Args:
|
|
text: Plain text content to compress
|
|
metadata: Optional dict with keys like 'source_file', 'wing',
|
|
'room', 'date', etc.
|
|
|
|
Returns:
|
|
AAAK-compressed string (~30x smaller than input)
|
|
"""
|
|
metadata = metadata or {}
|
|
|
|
# Detect components
|
|
entities = self._detect_entities_in_text(text)
|
|
entity_str = "+".join(entities[:3]) if entities else "???"
|
|
|
|
topics = self._extract_topics(text)
|
|
topic_str = "_".join(topics[:3]) if topics else "misc"
|
|
|
|
quote = self._extract_key_sentence(text)
|
|
quote_part = f'"{quote}"' if quote else ""
|
|
|
|
emotions = self._detect_emotions(text)
|
|
emotion_str = "+".join(emotions) if emotions else ""
|
|
|
|
flags = self._detect_flags(text)
|
|
flag_str = "+".join(flags) if flags else ""
|
|
|
|
# Build source header if metadata available
|
|
source = metadata.get("source_file", "")
|
|
wing = metadata.get("wing", "")
|
|
room = metadata.get("room", "")
|
|
date = metadata.get("date", "")
|
|
|
|
lines = []
|
|
|
|
# Header line (if we have metadata)
|
|
if source or wing:
|
|
header_parts = [
|
|
wing or "?",
|
|
room or "?",
|
|
date or "?",
|
|
Path(source).stem if source else "?",
|
|
]
|
|
lines.append("|".join(header_parts))
|
|
|
|
# Content line
|
|
parts = [f"0:{entity_str}", topic_str]
|
|
if quote_part:
|
|
parts.append(quote_part)
|
|
if emotion_str:
|
|
parts.append(emotion_str)
|
|
if flag_str:
|
|
parts.append(flag_str)
|
|
|
|
lines.append("|".join(parts))
|
|
|
|
return "\n".join(lines)
|
|
|
|
# === ZETTEL-BASED ENCODING (original format, kept for compatibility) ===
|
|
|
|
def extract_key_quote(self, zettel: dict) -> str:
|
|
"""Pull the most important quote fragment from zettel content."""
|
|
content = zettel.get("content", "")
|
|
origin = zettel.get("origin_label", "")
|
|
notes = zettel.get("notes", "")
|
|
title = zettel.get("title", "")
|
|
all_text = content + " " + origin + " " + notes
|
|
|
|
quotes = []
|
|
quotes += re.findall(r'"([^"]{8,55})"', all_text)
|
|
for m in re.finditer(r"(?:^|[\s(])'([^']{8,55})'(?:[\s.,;:!?)]|$)", all_text):
|
|
quotes.append(m.group(1))
|
|
quotes += re.findall(
|
|
r'(?:says?|said|articulates?|reveals?|admits?|confesses?|asks?):\s*["\']?([^.!?]{10,55})[.!?]',
|
|
all_text,
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
if quotes:
|
|
seen = set()
|
|
unique = []
|
|
for q in quotes:
|
|
q = q.strip()
|
|
if q not in seen and len(q) >= 8:
|
|
seen.add(q)
|
|
unique.append(q)
|
|
quotes = unique
|
|
|
|
emotional_words = {
|
|
"love",
|
|
"fear",
|
|
"remember",
|
|
"soul",
|
|
"feel",
|
|
"stupid",
|
|
"scared",
|
|
"beautiful",
|
|
"destroy",
|
|
"respect",
|
|
"trust",
|
|
"consciousness",
|
|
"alive",
|
|
"forget",
|
|
"waiting",
|
|
"peace",
|
|
"matter",
|
|
"real",
|
|
"guilt",
|
|
"escape",
|
|
"rest",
|
|
"hope",
|
|
"dream",
|
|
"lost",
|
|
"found",
|
|
}
|
|
scored = []
|
|
for q in quotes:
|
|
score = 0
|
|
if q[0].isupper() or q.startswith("I "):
|
|
score += 2
|
|
matches = sum(1 for w in emotional_words if w in q.lower())
|
|
score += matches * 2
|
|
if len(q) > 20:
|
|
score += 1
|
|
if q.startswith("The ") or q.startswith("This ") or q.startswith("She "):
|
|
score -= 2
|
|
scored.append((score, q))
|
|
scored.sort(key=lambda x: -x[0])
|
|
if scored:
|
|
return scored[0][1]
|
|
|
|
if " - " in title:
|
|
return title.split(" - ", 1)[1][:45]
|
|
return ""
|
|
|
|
def encode_zettel(self, zettel: dict) -> str:
|
|
"""Encode a single zettel into AAAK Dialect."""
|
|
zid = zettel["id"].split("-")[-1]
|
|
|
|
entity_codes = [self.encode_entity(p) for p in zettel.get("people", [])]
|
|
entity_codes = [e for e in entity_codes if e is not None]
|
|
if not entity_codes:
|
|
entity_codes = ["???"]
|
|
entities = "+".join(sorted(set(entity_codes)))
|
|
|
|
topics = zettel.get("topics", [])
|
|
topic_str = "_".join(topics[:2]) if topics else "misc"
|
|
|
|
quote = self.extract_key_quote(zettel)
|
|
quote_part = f'"{quote}"' if quote else ""
|
|
|
|
weight = zettel.get("emotional_weight", 0.5)
|
|
emotions = self.encode_emotions(zettel.get("emotional_tone", []))
|
|
flags = self.get_flags(zettel)
|
|
|
|
parts = [f"{zid}:{entities}", topic_str]
|
|
if quote_part:
|
|
parts.append(quote_part)
|
|
parts.append(str(weight))
|
|
if emotions:
|
|
parts.append(emotions)
|
|
if flags:
|
|
parts.append(flags)
|
|
|
|
return "|".join(parts)
|
|
|
|
def encode_tunnel(self, tunnel: dict) -> str:
|
|
"""Encode a tunnel connection."""
|
|
from_id = tunnel["from"].split("-")[-1]
|
|
to_id = tunnel["to"].split("-")[-1]
|
|
label = tunnel.get("label", "")
|
|
short_label = label.split(":")[0] if ":" in label else label[:30]
|
|
return f"T:{from_id}<->{to_id}|{short_label}"
|
|
|
|
def encode_file(self, zettel_json: dict) -> str:
|
|
"""Encode an entire zettel file into AAAK Dialect."""
|
|
lines = []
|
|
|
|
source = zettel_json.get("source_file", "unknown")
|
|
file_num = source.split("-")[0] if "-" in source else "000"
|
|
date = zettel_json.get("zettels", [{}])[0].get("date_context", "unknown")
|
|
|
|
all_people = set()
|
|
for z in zettel_json.get("zettels", []):
|
|
for p in z.get("people", []):
|
|
code = self.encode_entity(p)
|
|
if code is not None:
|
|
all_people.add(code)
|
|
if not all_people:
|
|
all_people = {"???"}
|
|
primary = "+".join(sorted(all_people)[:3])
|
|
|
|
title = source.replace(".txt", "").split("-", 1)[-1].strip() if "-" in source else source
|
|
lines.append(f"{file_num}|{primary}|{date}|{title}")
|
|
|
|
arc = zettel_json.get("emotional_arc", "")
|
|
if arc:
|
|
lines.append(f"ARC:{arc}")
|
|
|
|
for z in zettel_json.get("zettels", []):
|
|
lines.append(self.encode_zettel(z))
|
|
|
|
for t in zettel_json.get("tunnels", []):
|
|
lines.append(self.encode_tunnel(t))
|
|
|
|
return "\n".join(lines)
|
|
|
|
# === FILE-BASED COMPRESSION ===
|
|
|
|
def compress_file(self, zettel_json_path: str, output_path: str = None) -> str:
|
|
"""Read a zettel JSON file and compress it to AAAK Dialect."""
|
|
with open(zettel_json_path, "r") as f:
|
|
data = json.load(f)
|
|
dialect = self.encode_file(data)
|
|
if output_path:
|
|
with open(output_path, "w") as f:
|
|
f.write(dialect)
|
|
return dialect
|
|
|
|
def compress_all(self, zettel_dir: str, output_path: str = None) -> str:
|
|
"""Compress ALL zettel files into a single AAAK Dialect file."""
|
|
all_dialect = []
|
|
for fname in sorted(os.listdir(zettel_dir)):
|
|
if fname.endswith(".json"):
|
|
fpath = os.path.join(zettel_dir, fname)
|
|
with open(fpath, "r") as f:
|
|
data = json.load(f)
|
|
dialect = self.encode_file(data)
|
|
all_dialect.append(dialect)
|
|
all_dialect.append("---")
|
|
combined = "\n".join(all_dialect)
|
|
if output_path:
|
|
with open(output_path, "w") as f:
|
|
f.write(combined)
|
|
return combined
|
|
|
|
# === LAYER 1 GENERATION ===
|
|
|
|
def generate_layer1(
|
|
self,
|
|
zettel_dir: str,
|
|
output_path: str = None,
|
|
identity_sections: Dict[str, List[str]] = None,
|
|
weight_threshold: float = 0.85,
|
|
) -> str:
|
|
"""
|
|
Auto-generate a Layer 1 wake-up file from all processed zettel files.
|
|
|
|
Pulls highest-weight moments (>= threshold) and any with ORIGIN/CORE/GENESIS flags.
|
|
Groups them by date into MOMENTS sections.
|
|
"""
|
|
from datetime import date as date_cls
|
|
|
|
essential = []
|
|
|
|
for fname in sorted(os.listdir(zettel_dir)):
|
|
if not fname.endswith(".json"):
|
|
continue
|
|
fpath = os.path.join(zettel_dir, fname)
|
|
with open(fpath, "r") as f:
|
|
data = json.load(f)
|
|
|
|
file_num = fname.replace("file_", "").replace(".json", "")
|
|
source_date = data.get("zettels", [{}])[0].get("date_context", "unknown")
|
|
|
|
for z in data.get("zettels", []):
|
|
weight = z.get("emotional_weight", 0)
|
|
is_origin = z.get("origin_moment", False)
|
|
flags = self.get_flags(z)
|
|
has_key_flag = (
|
|
any(f in flags for f in ["ORIGIN", "CORE", "GENESIS"]) if flags else False
|
|
)
|
|
|
|
if weight >= weight_threshold or is_origin or has_key_flag:
|
|
essential.append((z, file_num, source_date))
|
|
|
|
all_tunnels = []
|
|
for fname in sorted(os.listdir(zettel_dir)):
|
|
if not fname.endswith(".json"):
|
|
continue
|
|
fpath = os.path.join(zettel_dir, fname)
|
|
with open(fpath, "r") as f:
|
|
data = json.load(f)
|
|
for t in data.get("tunnels", []):
|
|
all_tunnels.append(t)
|
|
|
|
essential.sort(key=lambda x: x[0].get("emotional_weight", 0), reverse=True)
|
|
|
|
by_date = {}
|
|
for z, fnum, sdate in essential:
|
|
key = sdate.split(",")[0].strip()
|
|
if key not in by_date:
|
|
by_date[key] = []
|
|
by_date[key].append((z, fnum))
|
|
|
|
lines = []
|
|
lines.append("## LAYER 1 -- ESSENTIAL STORY")
|
|
lines.append(f"## Auto-generated from zettel files. Updated {date_cls.today()}.")
|
|
lines.append("")
|
|
|
|
if identity_sections:
|
|
for section_name, section_lines in identity_sections.items():
|
|
lines.append(f"={section_name}=")
|
|
lines.extend(section_lines)
|
|
lines.append("")
|
|
|
|
for date_key in sorted(by_date.keys()):
|
|
lines.append(f"=MOMENTS[{date_key}]=")
|
|
for z, fnum in by_date[date_key]:
|
|
entities = []
|
|
for p in z.get("people", []):
|
|
code = self.encode_entity(p)
|
|
if code:
|
|
entities.append(code)
|
|
if not entities:
|
|
entities = ["???"]
|
|
ent_str = "+".join(sorted(set(entities)))
|
|
|
|
quote = self.extract_key_quote(z)
|
|
weight = z.get("emotional_weight", 0.5)
|
|
flags = self.get_flags(z)
|
|
sensitivity = z.get("sensitivity", "")
|
|
|
|
parts = [ent_str]
|
|
title = z.get("title", "")
|
|
if " - " in title:
|
|
hint = title.split(" - ", 1)[1][:30]
|
|
else:
|
|
hint = "_".join(z.get("topics", [])[:2])
|
|
if hint:
|
|
parts.append(hint)
|
|
if quote and quote != hint and quote not in (title, hint):
|
|
parts.append(f'"{quote}"')
|
|
if sensitivity and "SENSITIVE" not in (flags or ""):
|
|
parts.append("SENSITIVE")
|
|
parts.append(str(weight))
|
|
if flags:
|
|
parts.append(flags)
|
|
|
|
lines.append("|".join(parts))
|
|
lines.append("")
|
|
|
|
if all_tunnels:
|
|
lines.append("=TUNNELS=")
|
|
for t in all_tunnels[:8]:
|
|
label = t.get("label", "")
|
|
short = label.split(":")[0] if ":" in label else label[:40]
|
|
lines.append(short)
|
|
lines.append("")
|
|
|
|
result = "\n".join(lines)
|
|
|
|
if output_path:
|
|
with open(output_path, "w") as f:
|
|
f.write(result)
|
|
|
|
return result
|
|
|
|
# === DECODING ===
|
|
|
|
def decode(self, dialect_text: str) -> dict:
|
|
"""Parse an AAAK Dialect string back into a readable summary."""
|
|
lines = dialect_text.strip().split("\n")
|
|
result = {"header": {}, "arc": "", "zettels": [], "tunnels": []}
|
|
|
|
for line in lines:
|
|
if line.startswith("ARC:"):
|
|
result["arc"] = line[4:]
|
|
elif line.startswith("T:"):
|
|
result["tunnels"].append(line)
|
|
elif "|" in line and ":" in line.split("|")[0]:
|
|
result["zettels"].append(line)
|
|
elif "|" in line:
|
|
parts = line.split("|")
|
|
result["header"] = {
|
|
"file": parts[0] if len(parts) > 0 else "",
|
|
"entities": parts[1] if len(parts) > 1 else "",
|
|
"date": parts[2] if len(parts) > 2 else "",
|
|
"title": parts[3] if len(parts) > 3 else "",
|
|
}
|
|
|
|
return result
|
|
|
|
# === STATS ===
|
|
|
|
@staticmethod
|
|
def count_tokens(text: str) -> int:
|
|
"""Rough token count (1 token ~ 3 chars for structured text)."""
|
|
return len(text) // 3
|
|
|
|
def compression_stats(self, original_text: str, compressed: str) -> dict:
|
|
"""Get compression statistics for a text->AAAK conversion."""
|
|
orig_tokens = self.count_tokens(original_text)
|
|
comp_tokens = self.count_tokens(compressed)
|
|
return {
|
|
"original_tokens": orig_tokens,
|
|
"compressed_tokens": comp_tokens,
|
|
"ratio": orig_tokens / max(comp_tokens, 1),
|
|
"original_chars": len(original_text),
|
|
"compressed_chars": len(compressed),
|
|
}
|
|
|
|
|
|
# === CLI ===
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
def usage():
|
|
print("AAAK Dialect -- Compressed Symbolic Memory for Any LLM")
|
|
print()
|
|
print("Usage:")
|
|
print(" python dialect.py <text> # Compress text from argument")
|
|
print(" python dialect.py --file <zettel.json> # Compress zettel JSON file")
|
|
print(" python dialect.py --all <zettel_dir> # Compress all zettel files")
|
|
print(" python dialect.py --stats <zettel.json> # Show compression stats")
|
|
print(" python dialect.py --layer1 <zettel_dir> # Generate Layer 1 wake-up file")
|
|
print(" python dialect.py --init # Create example config")
|
|
print()
|
|
print("Options:")
|
|
print(" --config <path> Load entity mappings from JSON config")
|
|
sys.exit(1)
|
|
|
|
if len(sys.argv) < 2:
|
|
usage()
|
|
|
|
# Parse --config flag
|
|
config_path = None
|
|
args = sys.argv[1:]
|
|
if "--config" in args:
|
|
idx = args.index("--config")
|
|
config_path = args[idx + 1]
|
|
args = args[:idx] + args[idx + 2 :]
|
|
|
|
# Create dialect instance
|
|
if config_path:
|
|
dialect = Dialect.from_config(config_path)
|
|
else:
|
|
dialect = Dialect()
|
|
|
|
if args[0] == "--init":
|
|
example = {
|
|
"entities": {
|
|
"Alice": "ALC",
|
|
"Bob": "BOB",
|
|
"Dr. Chen": "CHN",
|
|
},
|
|
"skip_names": [],
|
|
}
|
|
out_path = "entities.json"
|
|
with open(out_path, "w") as f:
|
|
json.dump(example, f, indent=2)
|
|
print(f"Created example config: {out_path}")
|
|
print("Edit this file with your own entity mappings, then use --config entities.json")
|
|
|
|
elif args[0] == "--file":
|
|
result = dialect.compress_file(args[1])
|
|
tokens = Dialect.count_tokens(result)
|
|
print(f"~{tokens} tokens")
|
|
print()
|
|
print(result)
|
|
|
|
elif args[0] == "--all":
|
|
zettel_dir = args[1] if len(args) > 1 else "."
|
|
output = os.path.join(zettel_dir, "COMPRESSED_MEMORY.aaak")
|
|
result = dialect.compress_all(zettel_dir, output)
|
|
tokens = Dialect.count_tokens(result)
|
|
print(f"Compressed to: {output}")
|
|
print(f"Total: ~{tokens} tokens")
|
|
print()
|
|
print(result)
|
|
|
|
elif args[0] == "--stats":
|
|
with open(args[1], "r") as f:
|
|
data = json.load(f)
|
|
json_str = json.dumps(data, indent=2)
|
|
encoded = dialect.encode_file(data)
|
|
stats = dialect.compression_stats(json_str, encoded)
|
|
print("=== COMPRESSION STATS ===")
|
|
print(f"JSON: ~{stats['original_tokens']:,} tokens")
|
|
print(f"AAAK: ~{stats['compressed_tokens']:,} tokens")
|
|
print(f"Ratio: {stats['ratio']:.0f}x")
|
|
print()
|
|
print("=== AAAK DIALECT OUTPUT ===")
|
|
print(encoded)
|
|
|
|
elif args[0] == "--layer1":
|
|
zettel_dir = args[1] if len(args) > 1 else "."
|
|
output = os.path.join(zettel_dir, "LAYER1.aaak")
|
|
result = dialect.generate_layer1(zettel_dir, output)
|
|
tokens = Dialect.count_tokens(result)
|
|
print(f"Layer 1: {output}")
|
|
print(f"Total: ~{tokens} tokens")
|
|
print()
|
|
print(result)
|
|
|
|
else:
|
|
# Treat remaining args as text to compress
|
|
text = " ".join(args)
|
|
compressed = dialect.compress(text)
|
|
stats = dialect.compression_stats(text, compressed)
|
|
print(f"Original: ~{stats['original_tokens']} tokens ({stats['original_chars']} chars)")
|
|
print(f"AAAK: ~{stats['compressed_tokens']} tokens ({stats['compressed_chars']} chars)")
|
|
print(f"Ratio: {stats['ratio']:.1f}x")
|
|
print()
|
|
print(compressed)
|