fix: use permissive validator for KG entity values (closes #455)

sanitize_name rejects commas, colons, parentheses, and slashes — characters
that commonly appear in knowledge graph subject/object values. Adds
sanitize_kg_value for KG entity fields (subject, object, entity) while
keeping sanitize_name for predicates and wing/room names.
This commit is contained in:
eblander
2026-04-14 09:26:47 -04:00
parent b060171c59
commit 79c9c0e517
3 changed files with 83 additions and 8 deletions
+24
View File
@@ -47,6 +47,30 @@ def sanitize_name(value: str, field_name: str = "name") -> str:
return value
def sanitize_kg_value(value: str, field_name: str = "value") -> str:
"""Validate a knowledge-graph entity name (subject or object).
More permissive than sanitize_name — allows punctuation like commas,
colons, and parentheses that are common in natural-language KG values.
Only blocks null bytes and over-length strings.
Not used for wing/room names (which have filesystem constraints) or
predicates (which should be simple relationship identifiers).
"""
if not isinstance(value, str) or not value.strip():
raise ValueError(f"{field_name} must be a non-empty string")
value = value.strip()
if len(value) > MAX_NAME_LENGTH:
raise ValueError(f"{field_name} exceeds maximum length of {MAX_NAME_LENGTH} characters")
if "\x00" in value:
raise ValueError(f"{field_name} contains null bytes")
return value
def sanitize_content(value: str, max_length: int = 100_000) -> str:
"""Validate drawer/diary content length."""
if not isinstance(value, str) or not value.strip():