Files
mempalace/mempalace/i18n/en.json
T
Igor Lins e Silva 6aebf458ff fix(entity): reduce noise in regex-based detection
The pattern-matching detector had several systematic false positives that
crowded the init review with nonsense. Concrete fixes:

- CamelCase extraction: add `[A-Z][a-z]+(?:[A-Z][a-z]+|[A-Z]{2,})+` to
  candidate patterns so `MemPalace`, `ChromaDB`, `OpenAI`, `ChatGPT` are
  visible. Previously `MemPalace` fragmented into `Mem` + `Palace`.
- Dialogue `^NAME:\s` requires >=2 matches to count. A single metadata
  line like `Created: 2026-04-21` was scoring as dialogue and classifying
  `Created` as a person.
- Versioned/hyphenated pattern tightened to `\b{name}[-_]v?\d+(?:\.\d+)*\b`
  (version-only). The previous `\b{name}[-v]\w+` matched `context-manager`,
  `multi-word`, etc. - every hyphenated compound.
- Skip LICENSE/COPYING/NOTICE/AUTHORS/PATENTS files during scan. They
  produce pure-English-prose noise (`Contributor`, `Software`, `Covered`,
  `Before`).
- Extra SKIP_DIRS: `.terraform`, `vendor`, `target`.
- Expand stopword list with capitalized participles/descriptors that
  commonly appear at sentence start: `created`, `updated`, `extracted`,
  `processed`, `total`, `summary`, `auto`, `multi`, `hybrid`, `context`,
  `bridge`, `batch`, `local`, `native`, `never`, `before`, `after`, etc.
- classify_entity: high-pronoun single-category signal now classifies as
  person. A diary's main character gets referenced with pronouns, not
  dialogue markers - requiring two signal categories demoted `Lu` (16
  pronoun hits across 30 mentions) to uncertain. Gate on
  `pronoun_hits >= 5 AND pronoun_hits / frequency >= 0.2` so common
  sentence-start words (`Never`, `Before`) with incidental proximity
  stay uncertain.
2026-04-24 00:20:32 -03:00

157 lines
7.1 KiB
JSON

{
"lang": "en",
"label": "English",
"terms": {
"palace": "palace",
"wing": "wing",
"hall": "hall",
"closet": "closet",
"drawer": "drawer",
"mine": "mine",
"search": "search",
"status": "status",
"init": "init",
"repair": "repair",
"migrate": "migrate",
"entity": "entity",
"topic": "topic"
},
"cli": {
"mine_start": "Mining {path}...",
"mine_complete": "Done. {closets} closets, {drawers} drawers created.",
"mine_skip": "Already mined. Use --force to re-mine.",
"search_no_results": "No results for: {query}",
"search_results": "Found {count} results:",
"status_palace": "Palace: {path}",
"status_wings": "{count} wings",
"status_closets": "{count} closets",
"status_drawers": "{count} drawers",
"init_complete": "Palace initialized at {path}",
"init_exists": "Palace already exists at {path}",
"repair_complete": "Repair complete. {fixed} issues fixed.",
"migrate_complete": "Migration complete.",
"no_palace": "No palace found. Run: mempalace init <dir>"
},
"aaak": {
"instruction": "Compress to index format. Hyphens between words, pipes between concepts. Drop articles and filler. Keep names and numbers exact."
},
"regex": {
"topic_pattern": "[A-Z][a-z]{2,}|[A-Za-z][A-Za-z0-9_]{2,}",
"stop_words": "the this that these those some many most each every other only such very will would could should must shall yeah okay also even then now already still back done make take give know think want need going come find work added saved session summary conversation topics source about once just really actually here there where good great better thank please sorry right wrong true false",
"quote_pattern": "\"([^\"]{20,200})\"",
"action_pattern": "(?:built|fixed|wrote|added|pushed|measured|tested|reviewed|created|deleted|updated|configured|deployed|migrated)\\s+[\\w\\s]{3,30}"
},
"entity": {
"candidate_pattern": "[A-Z][a-z]+(?:[A-Z][a-z]+|[A-Z]{2,})+|[A-Z][a-z]{1,19}",
"multi_word_pattern": "[A-Z][a-z]+(?:\\s+[A-Z][a-z]+)+",
"person_verb_patterns": [
"\\b{name}\\s+said\\b",
"\\b{name}\\s+asked\\b",
"\\b{name}\\s+told\\b",
"\\b{name}\\s+replied\\b",
"\\b{name}\\s+laughed\\b",
"\\b{name}\\s+smiled\\b",
"\\b{name}\\s+cried\\b",
"\\b{name}\\s+felt\\b",
"\\b{name}\\s+thinks?\\b",
"\\b{name}\\s+wants?\\b",
"\\b{name}\\s+loves?\\b",
"\\b{name}\\s+hates?\\b",
"\\b{name}\\s+knows?\\b",
"\\b{name}\\s+decided\\b",
"\\b{name}\\s+pushed\\b",
"\\b{name}\\s+wrote\\b",
"\\bhey\\s+{name}\\b",
"\\bthanks?\\s+{name}\\b",
"\\bhi\\s+{name}\\b",
"\\bdear\\s+{name}\\b"
],
"pronoun_patterns": [
"\\bshe\\b",
"\\bher\\b",
"\\bhers\\b",
"\\bhe\\b",
"\\bhim\\b",
"\\bhis\\b",
"\\bthey\\b",
"\\bthem\\b",
"\\btheir\\b"
],
"dialogue_patterns": [
"^>\\s*{name}[:\\s]",
"^{name}:\\s",
"^\\[{name}\\]",
"\"{name}\\s+said"
],
"direct_address_pattern": "\\bhey\\s+{name}\\b|\\bthanks?\\s+{name}\\b|\\bhi\\s+{name}\\b",
"project_verb_patterns": [
"\\bbuilding\\s+{name}\\b",
"\\bbuilt\\s+{name}\\b",
"\\bship(?:ping|ped)?\\s+{name}\\b",
"\\blaunch(?:ing|ed)?\\s+{name}\\b",
"\\bdeploy(?:ing|ed)?\\s+{name}\\b",
"\\binstall(?:ing|ed)?\\s+{name}\\b",
"\\bthe\\s+{name}\\s+architecture\\b",
"\\bthe\\s+{name}\\s+pipeline\\b",
"\\bthe\\s+{name}\\s+system\\b",
"\\bthe\\s+{name}\\s+repo\\b",
"\\b{name}\\s+v\\d+\\b",
"\\b{name}\\.py\\b",
"\\b{name}-core\\b",
"\\b{name}-local\\b",
"\\bimport\\s+{name}\\b",
"\\bpip\\s+install\\s+{name}\\b"
],
"stopwords": [
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to",
"for", "of", "with", "by", "from", "as", "is", "was", "are", "were",
"be", "been", "being", "have", "has", "had", "do", "does", "did",
"will", "would", "could", "should", "may", "might", "must", "shall", "can",
"this", "that", "these", "those", "it", "its", "they", "them", "their",
"we", "our", "you", "your", "i", "my", "me", "he", "she", "his", "her",
"who", "what", "when", "where", "why", "how", "which",
"if", "then", "so", "not", "no", "yes", "ok", "okay",
"just", "very", "really", "also", "already", "still", "even", "only",
"here", "there", "now", "too", "up", "out", "about", "like",
"use", "get", "got", "make", "made", "take", "put", "come", "go", "see",
"know", "think", "true", "false", "none", "null", "new", "old", "all", "any", "some",
"return", "print", "def", "class", "import",
"step", "usage", "run", "check", "find", "add", "set", "list",
"args", "dict", "str", "int", "bool", "path", "file", "type", "name",
"note", "example", "option", "result", "error", "warning", "info",
"every", "each", "more", "less", "next", "last", "first", "second",
"stack", "layer", "mode", "test", "stop", "start", "copy", "move",
"source", "target", "output", "input", "data", "item", "key", "value",
"returns", "raises", "yields", "self", "cls", "kwargs",
"world", "well", "want", "topic", "choose", "social", "cars", "phones",
"healthcare", "ex", "machina", "deus", "human", "humans", "people",
"things", "something", "nothing", "everything", "anything", "someone",
"everyone", "anyone", "way", "time", "day", "life", "place", "thing",
"part", "kind", "sort", "case", "point", "idea", "fact", "sense",
"question", "answer", "reason", "number", "version", "system",
"hey", "hi", "hello", "thanks", "thank", "right", "let",
"click", "hit", "press", "tap", "drag", "drop", "open", "close",
"save", "load", "launch", "install", "download", "upload", "scroll",
"select", "enter", "submit", "cancel", "confirm", "delete", "paste",
"write", "read", "search", "show", "hide",
"desktop", "documents", "downloads", "users", "home", "library",
"applications", "preferences", "settings", "terminal",
"actor", "vector", "remote", "control", "duration", "fetch",
"agents", "tools", "others", "guards", "ethics", "regulation",
"learning", "thinking", "memory", "language", "intelligence",
"technology", "society", "culture", "future", "history", "science",
"model", "models", "network", "networks", "training", "inference",
"created", "updated", "deleted", "added", "removed", "modified",
"extracted", "processed", "generated", "compiled", "launched", "installed",
"deployed", "executed", "loaded", "parsed", "validated", "configured",
"total", "summary", "covered", "included", "pending", "failed", "success",
"ready", "active", "disabled", "enabled", "available", "completed",
"auto", "multi", "mini", "micro", "meta", "super", "hybrid",
"context", "bridge", "batch", "local", "global", "native", "cloud",
"before", "after", "during", "often", "always", "never",
"project", "contributor", "software",
"backend", "frontend", "server", "client", "service", "app", "api"
]
}
}