Merge pull request #1167 from arnoldwender/fix/kg-date-validation
fix(kg): validate ISO-8601 date formats at MCP boundary
This commit is contained in:
@@ -81,6 +81,38 @@ def sanitize_kg_value(value: str, field_name: str = "value") -> str:
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# ISO-8601 date validator for knowledge-graph temporal parameters
|
||||||
|
# (as_of, valid_from, valid_to, ended). Parameterized queries already
|
||||||
|
# prevent SQL injection, but unvalidated date strings silently miss
|
||||||
|
# every row — callers cannot distinguish "no fact at this time" from
|
||||||
|
# "your date format was unrecognized." Require full YYYY-MM-DD: KG
|
||||||
|
# queries compare TEXT dates lexicographically, so partials like "2026"
|
||||||
|
# would re-introduce silent empty results (e.g. "2026-01-01" <= "2026"
|
||||||
|
# is False), defeating the purpose of validation.
|
||||||
|
_ISO_DATE_RE = re.compile(r"^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$")
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_iso_date(value, field_name: str = "date"):
|
||||||
|
"""Validate an ISO-8601 date string, accepting None or empty as-is.
|
||||||
|
|
||||||
|
Accepts only ``YYYY-MM-DD``. Raises ValueError on any other
|
||||||
|
non-empty input so the MCP layer can surface a clear error to the
|
||||||
|
caller instead of silently returning empty results. Partial dates
|
||||||
|
(``YYYY``, ``YYYY-MM``) are rejected because KG queries compare
|
||||||
|
TEXT dates lexicographically and would silently exclude valid facts.
|
||||||
|
"""
|
||||||
|
if value is None or value == "":
|
||||||
|
return value
|
||||||
|
if not isinstance(value, str):
|
||||||
|
raise ValueError(f"{field_name} must be a string")
|
||||||
|
value = value.strip()
|
||||||
|
if not _ISO_DATE_RE.match(value):
|
||||||
|
raise ValueError(
|
||||||
|
f"{field_name}={value!r} is not a valid ISO-8601 date " f"(expected YYYY-MM-DD)"
|
||||||
|
)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def sanitize_content(value: str, max_length: int = 100_000) -> str:
|
def sanitize_content(value: str, max_length: int = 100_000) -> str:
|
||||||
"""Validate drawer/diary content length."""
|
"""Validate drawer/diary content length."""
|
||||||
if not isinstance(value, str) or not value.strip():
|
if not isinstance(value, str) or not value.strip():
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ from .config import ( # noqa: E402
|
|||||||
sanitize_kg_value,
|
sanitize_kg_value,
|
||||||
sanitize_name,
|
sanitize_name,
|
||||||
sanitize_content,
|
sanitize_content,
|
||||||
|
sanitize_iso_date,
|
||||||
)
|
)
|
||||||
from .version import __version__ # noqa: E402
|
from .version import __version__ # noqa: E402
|
||||||
from chromadb.errors import NotFoundError as _ChromaNotFoundError # noqa: E402
|
from chromadb.errors import NotFoundError as _ChromaNotFoundError # noqa: E402
|
||||||
@@ -1059,6 +1060,7 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"):
|
|||||||
"""Query the knowledge graph for an entity's relationships."""
|
"""Query the knowledge graph for an entity's relationships."""
|
||||||
try:
|
try:
|
||||||
entity = sanitize_kg_value(entity, "entity")
|
entity = sanitize_kg_value(entity, "entity")
|
||||||
|
as_of = sanitize_iso_date(as_of, "as_of")
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
if direction not in ("outgoing", "incoming", "both"):
|
if direction not in ("outgoing", "incoming", "both"):
|
||||||
@@ -1092,6 +1094,7 @@ def tool_kg_add(
|
|||||||
subject = sanitize_kg_value(subject, "subject")
|
subject = sanitize_kg_value(subject, "subject")
|
||||||
predicate = sanitize_name(predicate, "predicate")
|
predicate = sanitize_name(predicate, "predicate")
|
||||||
object = sanitize_kg_value(object, "object")
|
object = sanitize_kg_value(object, "object")
|
||||||
|
valid_from = sanitize_iso_date(valid_from, "valid_from")
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return {"success": False, "error": str(e)}
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
@@ -1135,6 +1138,7 @@ def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = N
|
|||||||
subject = sanitize_kg_value(subject, "subject")
|
subject = sanitize_kg_value(subject, "subject")
|
||||||
predicate = sanitize_name(predicate, "predicate")
|
predicate = sanitize_name(predicate, "predicate")
|
||||||
object = sanitize_kg_value(object, "object")
|
object = sanitize_kg_value(object, "object")
|
||||||
|
ended = sanitize_iso_date(ended, "ended")
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return {"success": False, "error": str(e)}
|
return {"success": False, "error": str(e)}
|
||||||
resolved_ended = ended or date.today().isoformat()
|
resolved_ended = ended or date.today().isoformat()
|
||||||
|
|||||||
+73
-1
@@ -3,7 +3,13 @@ import json
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name
|
from mempalace.config import (
|
||||||
|
MempalaceConfig,
|
||||||
|
normalize_wing_name,
|
||||||
|
sanitize_iso_date,
|
||||||
|
sanitize_kg_value,
|
||||||
|
sanitize_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_default_config():
|
def test_default_config():
|
||||||
@@ -212,3 +218,69 @@ def test_kg_value_rejects_null_bytes():
|
|||||||
def test_kg_value_rejects_over_length():
|
def test_kg_value_rejects_over_length():
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
sanitize_kg_value("a" * 129)
|
sanitize_kg_value("a" * 129)
|
||||||
|
|
||||||
|
|
||||||
|
# --- sanitize_iso_date ---
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_year_only():
|
||||||
|
# Partial dates re-introduce silent empty result sets via lexicographic
|
||||||
|
# TEXT comparison in KG queries (e.g. "2026-01-01" <= "2026" is False).
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date("2026")
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_year_month():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date("2026-03")
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_accepts_full_date():
|
||||||
|
assert sanitize_iso_date("2026-03-15") == "2026-03-15"
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_passes_through_none():
|
||||||
|
assert sanitize_iso_date(None) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_passes_through_empty_string():
|
||||||
|
assert sanitize_iso_date("") == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_strips_whitespace():
|
||||||
|
assert sanitize_iso_date(" 2026-03-15 ") == "2026-03-15"
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_natural_language():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date("March 2026")
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_abbreviated_month():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date("Jan 2025")
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_us_format():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date("03/15/2026")
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_invalid_month():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date("2026-13")
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_invalid_day():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date("2026-02-32")
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_rejects_non_string():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_iso_date(20260315)
|
||||||
|
|
||||||
|
|
||||||
|
def test_iso_date_error_names_field():
|
||||||
|
with pytest.raises(ValueError, match="valid_from"):
|
||||||
|
sanitize_iso_date("yesterday", "valid_from")
|
||||||
|
|||||||
@@ -788,6 +788,59 @@ class TestKGTools:
|
|||||||
result = tool_kg_stats()
|
result = tool_kg_stats()
|
||||||
assert result["entities"] >= 4
|
assert result["entities"] >= 4
|
||||||
|
|
||||||
|
# --- Date validation at the MCP boundary (issue #1164) ---
|
||||||
|
|
||||||
|
def test_kg_add_rejects_invalid_valid_from(self, monkeypatch, config, palace_path, kg):
|
||||||
|
_patch_mcp_server(monkeypatch, config, kg)
|
||||||
|
from mempalace.mcp_server import tool_kg_add
|
||||||
|
|
||||||
|
result = tool_kg_add(
|
||||||
|
subject="Alice",
|
||||||
|
predicate="likes",
|
||||||
|
object="coffee",
|
||||||
|
valid_from="Jan 2025",
|
||||||
|
)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "valid_from" in result["error"]
|
||||||
|
assert "ISO-8601" in result["error"]
|
||||||
|
|
||||||
|
def test_kg_query_rejects_invalid_as_of(self, monkeypatch, config, palace_path, seeded_kg):
|
||||||
|
_patch_mcp_server(monkeypatch, config, seeded_kg)
|
||||||
|
from mempalace.mcp_server import tool_kg_query
|
||||||
|
|
||||||
|
result = tool_kg_query(entity="Max", as_of="March 2026")
|
||||||
|
assert "error" in result
|
||||||
|
assert "as_of" in result["error"]
|
||||||
|
|
||||||
|
def test_kg_invalidate_rejects_invalid_ended(self, monkeypatch, config, palace_path, seeded_kg):
|
||||||
|
_patch_mcp_server(monkeypatch, config, seeded_kg)
|
||||||
|
from mempalace.mcp_server import tool_kg_invalidate
|
||||||
|
|
||||||
|
result = tool_kg_invalidate(
|
||||||
|
subject="Max",
|
||||||
|
predicate="does",
|
||||||
|
object="chess",
|
||||||
|
ended="yesterday",
|
||||||
|
)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "ended" in result["error"]
|
||||||
|
|
||||||
|
def test_kg_query_rejects_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg):
|
||||||
|
_patch_mcp_server(monkeypatch, config, seeded_kg)
|
||||||
|
from mempalace.mcp_server import tool_kg_query
|
||||||
|
|
||||||
|
# Partial ISO dates are rejected: KG queries compare TEXT dates
|
||||||
|
# lexicographically, so "2026-01-01" <= "2026" is False, which
|
||||||
|
# silently excludes facts. Reject at the boundary — only YYYY-MM-DD
|
||||||
|
# produces correct results.
|
||||||
|
for value in ("2026", "2026-03"):
|
||||||
|
result = tool_kg_query(entity="Max", as_of=value)
|
||||||
|
assert "error" in result, f"accepted partial date {value!r}: {result}"
|
||||||
|
|
||||||
|
# Full ISO-8601 dates still pass.
|
||||||
|
result = tool_kg_query(entity="Max", as_of="2026-03-15")
|
||||||
|
assert "error" not in result, f"rejected valid date: {result}"
|
||||||
|
|
||||||
|
|
||||||
# ── Diary Tools ─────────────────────────────────────────────────────────
|
# ── Diary Tools ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user