fix(kg): validate ISO-8601 date formats at MCP boundary

tool_kg_query (as_of), tool_kg_add (valid_from), and tool_kg_invalidate
(ended) accepted any string and forwarded it to SQLite without format
validation. Parameterized queries prevent SQL injection, but invalid
date strings silently produce empty result sets — callers cannot
distinguish "no fact at this time" from "your date format was
unrecognized." This is especially painful for natural-language LLM
callers that synthesize dates like "March 2026" or "Jan 2025".

Add sanitize_iso_date() in config.py alongside the other input
validators. It accepts YYYY, YYYY-MM, and YYYY-MM-DD forms; passes
through None/empty; and raises ValueError with a field-named message
on anything else. Call it from the three kg MCP tool wrappers before
values reach the storage layer so the caller gets a clear error
instead of a silent miss.

Closes #1164
This commit is contained in:
Arnold Wender
2026-04-24 11:09:16 +02:00
parent fdfaf017ab
commit 4d98b05240
4 changed files with 147 additions and 1 deletions
+28
View File
@@ -81,6 +81,34 @@ def sanitize_kg_value(value: str, field_name: str = "value") -> str:
return value return value
# ISO-8601 date validator for knowledge-graph temporal parameters
# (as_of, valid_from, valid_to, ended). Parameterized queries already
# prevent SQL injection, but unvalidated date strings silently miss
# every row — callers cannot distinguish "no fact at this time" from
# "your date format was unrecognized." Accept YYYY, YYYY-MM, YYYY-MM-DD.
_ISO_DATE_RE = re.compile(r"^\d{4}(?:-(?:0[1-9]|1[0-2])(?:-(?:0[1-9]|[12]\d|3[01]))?)?$")
def sanitize_iso_date(value, field_name: str = "date"):
"""Validate an ISO-8601 date string, accepting None or empty as-is.
Accepts ``YYYY``, ``YYYY-MM``, or ``YYYY-MM-DD``. Raises ValueError
on any other non-empty input so the MCP layer can surface a clear
error to the caller instead of silently returning empty results.
"""
if value is None or value == "":
return value
if not isinstance(value, str):
raise ValueError(f"{field_name} must be a string")
value = value.strip()
if not _ISO_DATE_RE.match(value):
raise ValueError(
f"{field_name}={value!r} is not a valid ISO-8601 date "
f"(expected YYYY, YYYY-MM, or YYYY-MM-DD)"
)
return value
def sanitize_content(value: str, max_length: int = 100_000) -> str: def sanitize_content(value: str, max_length: int = 100_000) -> str:
"""Validate drawer/diary content length.""" """Validate drawer/diary content length."""
if not isinstance(value, str) or not value.strip(): if not isinstance(value, str) or not value.strip():
+4
View File
@@ -55,6 +55,7 @@ from .config import ( # noqa: E402
sanitize_kg_value, sanitize_kg_value,
sanitize_name, sanitize_name,
sanitize_content, sanitize_content,
sanitize_iso_date,
) )
from .version import __version__ # noqa: E402 from .version import __version__ # noqa: E402
from .backends.chroma import ( # noqa: E402 from .backends.chroma import ( # noqa: E402
@@ -1021,6 +1022,7 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"):
"""Query the knowledge graph for an entity's relationships.""" """Query the knowledge graph for an entity's relationships."""
try: try:
entity = sanitize_kg_value(entity, "entity") entity = sanitize_kg_value(entity, "entity")
as_of = sanitize_iso_date(as_of, "as_of")
except ValueError as e: except ValueError as e:
return {"error": str(e)} return {"error": str(e)}
if direction not in ("outgoing", "incoming", "both"): if direction not in ("outgoing", "incoming", "both"):
@@ -1037,6 +1039,7 @@ def tool_kg_add(
subject = sanitize_kg_value(subject, "subject") subject = sanitize_kg_value(subject, "subject")
predicate = sanitize_name(predicate, "predicate") predicate = sanitize_name(predicate, "predicate")
object = sanitize_kg_value(object, "object") object = sanitize_kg_value(object, "object")
valid_from = sanitize_iso_date(valid_from, "valid_from")
except ValueError as e: except ValueError as e:
return {"success": False, "error": str(e)} return {"success": False, "error": str(e)}
@@ -1062,6 +1065,7 @@ def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = N
subject = sanitize_kg_value(subject, "subject") subject = sanitize_kg_value(subject, "subject")
predicate = sanitize_name(predicate, "predicate") predicate = sanitize_name(predicate, "predicate")
object = sanitize_kg_value(object, "object") object = sanitize_kg_value(object, "object")
ended = sanitize_iso_date(ended, "ended")
except ValueError as e: except ValueError as e:
return {"success": False, "error": str(e)} return {"success": False, "error": str(e)}
_wal_log( _wal_log(
+69 -1
View File
@@ -3,7 +3,13 @@ import json
import tempfile import tempfile
import pytest import pytest
from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name from mempalace.config import (
MempalaceConfig,
normalize_wing_name,
sanitize_iso_date,
sanitize_kg_value,
sanitize_name,
)
def test_default_config(): def test_default_config():
@@ -212,3 +218,65 @@ def test_kg_value_rejects_null_bytes():
def test_kg_value_rejects_over_length(): def test_kg_value_rejects_over_length():
with pytest.raises(ValueError): with pytest.raises(ValueError):
sanitize_kg_value("a" * 129) sanitize_kg_value("a" * 129)
# --- sanitize_iso_date ---
def test_iso_date_accepts_year_only():
assert sanitize_iso_date("2026") == "2026"
def test_iso_date_accepts_year_month():
assert sanitize_iso_date("2026-03") == "2026-03"
def test_iso_date_accepts_full_date():
assert sanitize_iso_date("2026-03-15") == "2026-03-15"
def test_iso_date_passes_through_none():
assert sanitize_iso_date(None) is None
def test_iso_date_passes_through_empty_string():
assert sanitize_iso_date("") == ""
def test_iso_date_strips_whitespace():
assert sanitize_iso_date(" 2026-03-15 ") == "2026-03-15"
def test_iso_date_rejects_natural_language():
with pytest.raises(ValueError):
sanitize_iso_date("March 2026")
def test_iso_date_rejects_abbreviated_month():
with pytest.raises(ValueError):
sanitize_iso_date("Jan 2025")
def test_iso_date_rejects_us_format():
with pytest.raises(ValueError):
sanitize_iso_date("03/15/2026")
def test_iso_date_rejects_invalid_month():
with pytest.raises(ValueError):
sanitize_iso_date("2026-13")
def test_iso_date_rejects_invalid_day():
with pytest.raises(ValueError):
sanitize_iso_date("2026-02-32")
def test_iso_date_rejects_non_string():
with pytest.raises(ValueError):
sanitize_iso_date(20260315)
def test_iso_date_error_names_field():
with pytest.raises(ValueError, match="valid_from"):
sanitize_iso_date("yesterday", "valid_from")
+46
View File
@@ -665,6 +665,52 @@ class TestKGTools:
result = tool_kg_stats() result = tool_kg_stats()
assert result["entities"] >= 4 assert result["entities"] >= 4
# --- Date validation at the MCP boundary (issue #1164) ---
def test_kg_add_rejects_invalid_valid_from(self, monkeypatch, config, palace_path, kg):
_patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_kg_add
result = tool_kg_add(
subject="Alice",
predicate="likes",
object="coffee",
valid_from="Jan 2025",
)
assert result["success"] is False
assert "valid_from" in result["error"]
assert "ISO-8601" in result["error"]
def test_kg_query_rejects_invalid_as_of(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_query
result = tool_kg_query(entity="Max", as_of="March 2026")
assert "error" in result
assert "as_of" in result["error"]
def test_kg_invalidate_rejects_invalid_ended(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_invalidate
result = tool_kg_invalidate(
subject="Max",
predicate="does",
object="chess",
ended="yesterday",
)
assert result["success"] is False
assert "ended" in result["error"]
def test_kg_query_accepts_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_query
# YYYY and YYYY-MM are valid ISO-8601 forms — must not be rejected.
for value in ("2026", "2026-03", "2026-03-15"):
result = tool_kg_query(entity="Max", as_of=value)
assert "error" not in result, f"rejected valid date {value!r}: {result}"
# ── Diary Tools ───────────────────────────────────────────────────────── # ── Diary Tools ─────────────────────────────────────────────────────────