From 79c9c0e517e39b5405853ee794ee8c4591947942 Mon Sep 17 00:00:00 2001 From: eblander Date: Tue, 14 Apr 2026 09:26:47 -0400 Subject: [PATCH] fix: use permissive validator for KG entity values (closes #455) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sanitize_name rejects commas, colons, parentheses, and slashes — characters that commonly appear in knowledge graph subject/object values. Adds sanitize_kg_value for KG entity fields (subject, object, entity) while keeping sanitize_name for predicates and wing/room names. --- mempalace/config.py | 24 +++++++++++++++++++ mempalace/mcp_server.py | 14 +++++------ tests/test_config.py | 53 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 8 deletions(-) diff --git a/mempalace/config.py b/mempalace/config.py index 6e84de1..a93ba81 100644 --- a/mempalace/config.py +++ b/mempalace/config.py @@ -47,6 +47,30 @@ def sanitize_name(value: str, field_name: str = "name") -> str: return value +def sanitize_kg_value(value: str, field_name: str = "value") -> str: + """Validate a knowledge-graph entity name (subject or object). + + More permissive than sanitize_name — allows punctuation like commas, + colons, and parentheses that are common in natural-language KG values. + Only blocks null bytes and over-length strings. + + Not used for wing/room names (which have filesystem constraints) or + predicates (which should be simple relationship identifiers). + """ + if not isinstance(value, str) or not value.strip(): + raise ValueError(f"{field_name} must be a non-empty string") + + value = value.strip() + + if len(value) > MAX_NAME_LENGTH: + raise ValueError(f"{field_name} exceeds maximum length of {MAX_NAME_LENGTH} characters") + + if "\x00" in value: + raise ValueError(f"{field_name} contains null bytes") + + return value + + def sanitize_content(value: str, max_length: int = 100_000) -> str: """Validate drawer/diary content length.""" if not isinstance(value, str) or not value.strip(): diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 4653f5f..f0edfbb 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -30,7 +30,7 @@ import time from datetime import datetime from pathlib import Path -from .config import MempalaceConfig, sanitize_name, sanitize_content +from .config import MempalaceConfig, sanitize_kg_value, sanitize_name, sanitize_content from .version import __version__ from .backends.chroma import ChromaBackend, ChromaCollection from .query_sanitizer import sanitize_query @@ -810,7 +810,7 @@ def tool_update_drawer(drawer_id: str, content: str = None, wing: str = None, ro def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"): """Query the knowledge graph for an entity's relationships.""" try: - entity = sanitize_name(entity, "entity") + entity = sanitize_kg_value(entity, "entity") except ValueError as e: return {"error": str(e)} if direction not in ("outgoing", "incoming", "both"): @@ -824,9 +824,9 @@ def tool_kg_add( ): """Add a relationship to the knowledge graph.""" try: - subject = sanitize_name(subject, "subject") + subject = sanitize_kg_value(subject, "subject") predicate = sanitize_name(predicate, "predicate") - object = sanitize_name(object, "object") + object = sanitize_kg_value(object, "object") except ValueError as e: return {"success": False, "error": str(e)} @@ -849,9 +849,9 @@ def tool_kg_add( def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None): """Mark a fact as no longer true (set end date).""" try: - subject = sanitize_name(subject, "subject") + subject = sanitize_kg_value(subject, "subject") predicate = sanitize_name(predicate, "predicate") - object = sanitize_name(object, "object") + object = sanitize_kg_value(object, "object") except ValueError as e: return {"success": False, "error": str(e)} _wal_log( @@ -870,7 +870,7 @@ def tool_kg_timeline(entity: str = None): """Get chronological timeline of facts, optionally for one entity.""" if entity is not None: try: - entity = sanitize_name(entity, "entity") + entity = sanitize_kg_value(entity, "entity") except ValueError as e: return {"error": str(e)} results = _kg.timeline(entity) diff --git a/tests/test_config.py b/tests/test_config.py index 761efe9..e6dffc3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -3,7 +3,7 @@ import json import tempfile import pytest -from mempalace.config import MempalaceConfig, sanitize_name +from mempalace.config import MempalaceConfig, sanitize_kg_value, sanitize_name def test_default_config(): @@ -66,3 +66,54 @@ def test_sanitize_name_rejects_path_traversal(): def test_sanitize_name_rejects_empty(): with pytest.raises(ValueError): sanitize_name("") + + +# --- sanitize_kg_value --- + + +def test_kg_value_accepts_commas(): + assert sanitize_kg_value("Alice, Bob, and Carol") == "Alice, Bob, and Carol" + + +def test_kg_value_accepts_colons(): + assert sanitize_kg_value("role: engineer") == "role: engineer" + + +def test_kg_value_accepts_parentheses(): + assert sanitize_kg_value("Python (programming)") == "Python (programming)" + + +def test_kg_value_accepts_slashes(): + assert sanitize_kg_value("owner/repo") == "owner/repo" + + +def test_kg_value_accepts_hash(): + assert sanitize_kg_value("issue #123") == "issue #123" + + +def test_kg_value_accepts_unicode(): + assert sanitize_kg_value("Jānis Bērziņš") == "Jānis Bērziņš" + + +def test_kg_value_strips_whitespace(): + assert sanitize_kg_value(" hello ") == "hello" + + +def test_kg_value_rejects_empty(): + with pytest.raises(ValueError): + sanitize_kg_value("") + + +def test_kg_value_rejects_whitespace_only(): + with pytest.raises(ValueError): + sanitize_kg_value(" ") + + +def test_kg_value_rejects_null_bytes(): + with pytest.raises(ValueError): + sanitize_kg_value("hello\x00world") + + +def test_kg_value_rejects_over_length(): + with pytest.raises(ValueError): + sanitize_kg_value("a" * 129)