fix(kg): reject partial ISO dates to avoid silent empty result sets

Per qodo-ai review on PR #1167: sanitize_iso_date() previously accepted
YYYY and YYYY-MM, but KnowledgeGraph.query_entity() compares valid_from/
valid_to TEXT columns lexicographically against as_of. Lexicographic
comparison treats '2026-01-01' as greater than '2026' (because '-' >
end-of-string), so partial as_of values silently excluded valid facts —
re-introducing the silent-empty-results problem this PR was meant to
fix.

Tighten _ISO_DATE_RE to require YYYY-MM-DD only. Update docstring and
error message accordingly. Invert the two test cases that asserted
partials were accepted.
This commit is contained in:
Arnold Wender
2026-04-26 12:50:43 +02:00
parent 4d98b05240
commit abe85763d4
3 changed files with 30 additions and 15 deletions
+11 -7
View File
@@ -85,16 +85,21 @@ def sanitize_kg_value(value: str, field_name: str = "value") -> str:
# (as_of, valid_from, valid_to, ended). Parameterized queries already # (as_of, valid_from, valid_to, ended). Parameterized queries already
# prevent SQL injection, but unvalidated date strings silently miss # prevent SQL injection, but unvalidated date strings silently miss
# every row — callers cannot distinguish "no fact at this time" from # every row — callers cannot distinguish "no fact at this time" from
# "your date format was unrecognized." Accept YYYY, YYYY-MM, YYYY-MM-DD. # "your date format was unrecognized." Require full YYYY-MM-DD: KG
_ISO_DATE_RE = re.compile(r"^\d{4}(?:-(?:0[1-9]|1[0-2])(?:-(?:0[1-9]|[12]\d|3[01]))?)?$") # queries compare TEXT dates lexicographically, so partials like "2026"
# would re-introduce silent empty results (e.g. "2026-01-01" <= "2026"
# is False), defeating the purpose of validation.
_ISO_DATE_RE = re.compile(r"^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$")
def sanitize_iso_date(value, field_name: str = "date"): def sanitize_iso_date(value, field_name: str = "date"):
"""Validate an ISO-8601 date string, accepting None or empty as-is. """Validate an ISO-8601 date string, accepting None or empty as-is.
Accepts ``YYYY``, ``YYYY-MM``, or ``YYYY-MM-DD``. Raises ValueError Accepts only ``YYYY-MM-DD``. Raises ValueError on any other
on any other non-empty input so the MCP layer can surface a clear non-empty input so the MCP layer can surface a clear error to the
error to the caller instead of silently returning empty results. caller instead of silently returning empty results. Partial dates
(``YYYY``, ``YYYY-MM``) are rejected because KG queries compare
TEXT dates lexicographically and would silently exclude valid facts.
""" """
if value is None or value == "": if value is None or value == "":
return value return value
@@ -103,8 +108,7 @@ def sanitize_iso_date(value, field_name: str = "date"):
value = value.strip() value = value.strip()
if not _ISO_DATE_RE.match(value): if not _ISO_DATE_RE.match(value):
raise ValueError( raise ValueError(
f"{field_name}={value!r} is not a valid ISO-8601 date " f"{field_name}={value!r} is not a valid ISO-8601 date " f"(expected YYYY-MM-DD)"
f"(expected YYYY, YYYY-MM, or YYYY-MM-DD)"
) )
return value return value
+8 -4
View File
@@ -223,12 +223,16 @@ def test_kg_value_rejects_over_length():
# --- sanitize_iso_date --- # --- sanitize_iso_date ---
def test_iso_date_accepts_year_only(): def test_iso_date_rejects_year_only():
assert sanitize_iso_date("2026") == "2026" # Partial dates re-introduce silent empty result sets via lexicographic
# TEXT comparison in KG queries (e.g. "2026-01-01" <= "2026" is False).
with pytest.raises(ValueError):
sanitize_iso_date("2026")
def test_iso_date_accepts_year_month(): def test_iso_date_rejects_year_month():
assert sanitize_iso_date("2026-03") == "2026-03" with pytest.raises(ValueError):
sanitize_iso_date("2026-03")
def test_iso_date_accepts_full_date(): def test_iso_date_accepts_full_date():
+11 -4
View File
@@ -702,14 +702,21 @@ class TestKGTools:
assert result["success"] is False assert result["success"] is False
assert "ended" in result["error"] assert "ended" in result["error"]
def test_kg_query_accepts_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg): def test_kg_query_rejects_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, seeded_kg) _patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_query from mempalace.mcp_server import tool_kg_query
# YYYY and YYYY-MM are valid ISO-8601 forms — must not be rejected. # Partial ISO dates are rejected: KG queries compare TEXT dates
for value in ("2026", "2026-03", "2026-03-15"): # lexicographically, so "2026-01-01" <= "2026" is False, which
# silently excludes facts. Reject at the boundary — only YYYY-MM-DD
# produces correct results.
for value in ("2026", "2026-03"):
result = tool_kg_query(entity="Max", as_of=value) result = tool_kg_query(entity="Max", as_of=value)
assert "error" not in result, f"rejected valid date {value!r}: {result}" assert "error" in result, f"accepted partial date {value!r}: {result}"
# Full ISO-8601 dates still pass.
result = tool_kg_query(entity="Max", as_of="2026-03-15")
assert "error" not in result, f"rejected valid date: {result}"
# ── Diary Tools ───────────────────────────────────────────────────────── # ── Diary Tools ─────────────────────────────────────────────────────────