Files
Arnold Wender abe85763d4 fix(kg): reject partial ISO dates to avoid silent empty result sets
Per qodo-ai review on PR #1167: sanitize_iso_date() previously accepted
YYYY and YYYY-MM, but KnowledgeGraph.query_entity() compares valid_from/
valid_to TEXT columns lexicographically against as_of. Lexicographic
comparison treats '2026-01-01' as greater than '2026' (because '-' >
end-of-string), so partial as_of values silently excluded valid facts —
re-introducing the silent-empty-results problem this PR was meant to
fix.

Tighten _ISO_DATE_RE to require YYYY-MM-DD only. Update docstring and
error message accordingly. Invert the two test cases that asserted
partials were accepted.
2026-04-30 15:21:18 +02:00

287 lines
8.2 KiB
Python

import os
import json
import tempfile
import pytest
from mempalace.config import (
MempalaceConfig,
normalize_wing_name,
sanitize_iso_date,
sanitize_kg_value,
sanitize_name,
)
def test_default_config():
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
assert "palace" in cfg.palace_path
assert cfg.collection_name == "mempalace_drawers"
def test_config_from_file():
tmpdir = tempfile.mkdtemp()
with open(os.path.join(tmpdir, "config.json"), "w") as f:
json.dump({"palace_path": "/custom/palace"}, f)
cfg = MempalaceConfig(config_dir=tmpdir)
assert cfg.palace_path == "/custom/palace"
def test_embedding_device_defaults_to_auto(monkeypatch):
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
assert cfg.embedding_device == "auto"
def test_embedding_device_from_config_is_normalized(tmp_path, monkeypatch):
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
with open(tmp_path / "config.json", "w") as f:
json.dump({"embedding_device": " CUDA "}, f)
cfg = MempalaceConfig(config_dir=str(tmp_path))
assert cfg.embedding_device == "cuda"
def test_embedding_device_env_overrides_config(tmp_path, monkeypatch):
with open(tmp_path / "config.json", "w") as f:
json.dump({"embedding_device": "cpu"}, f)
monkeypatch.setenv("MEMPALACE_EMBEDDING_DEVICE", " CoreML ")
cfg = MempalaceConfig(config_dir=str(tmp_path))
assert cfg.embedding_device == "coreml"
def test_env_override():
raw = "/env/palace"
os.environ["MEMPALACE_PALACE_PATH"] = raw
try:
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
# palace_path normalizes with abspath + expanduser to match the
# --palace CLI code path. On Unix that's a no-op for "/env/palace";
# on Windows abspath prepends the current drive letter.
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
finally:
del os.environ["MEMPALACE_PALACE_PATH"]
def test_env_path_expanduser():
# Tilde must be expanded to match the --palace CLI code path. We don't
# assert "~" is absent from the final string because Windows 8.3 short
# paths (e.g. C:\Users\RUNNER~1\...) legitimately contain tildes — the
# equality check is authoritative.
raw = os.path.join("~", "mempalace-test")
os.environ["MEMPALACE_PALACE_PATH"] = raw
try:
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
assert cfg.palace_path.endswith("mempalace-test")
finally:
del os.environ["MEMPALACE_PALACE_PATH"]
def test_env_path_abspath_collapses_traversal():
# Build a raw path with a .. segment using the platform separator so
# the assertion is portable (Windows uses \, POSIX uses /).
raw = os.path.join(tempfile.gettempdir(), "palace", "..", "mempalace-test")
expected = os.path.abspath(os.path.expanduser(raw))
os.environ["MEMPALACE_PALACE_PATH"] = raw
try:
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
# .. segments must be collapsed, not preserved literally.
assert ".." not in cfg.palace_path
assert cfg.palace_path == expected
finally:
del os.environ["MEMPALACE_PALACE_PATH"]
def test_env_path_legacy_alias_normalized():
# Legacy MEMPAL_PALACE_PATH gets the same normalization treatment as
# MEMPALACE_PALACE_PATH. We don't assert "~" is absent from the final
# string because Windows 8.3 short paths (e.g. C:\Users\RUNNER~1\...)
# legitimately contain tildes — the equality check below is authoritative.
os.environ.pop("MEMPALACE_PALACE_PATH", None)
raw = os.path.join("~", "legacy-alias", "..", "mempalace-test")
os.environ["MEMPAL_PALACE_PATH"] = raw
try:
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
assert ".." not in cfg.palace_path
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
finally:
del os.environ["MEMPAL_PALACE_PATH"]
def test_init():
tmpdir = tempfile.mkdtemp()
cfg = MempalaceConfig(config_dir=tmpdir)
cfg.init()
assert os.path.exists(os.path.join(tmpdir, "config.json"))
# --- normalize_wing_name ---
def test_normalize_wing_name_hyphen():
assert normalize_wing_name("mempal-private") == "mempal_private"
def test_normalize_wing_name_space():
assert normalize_wing_name("My Project") == "my_project"
def test_normalize_wing_name_already_clean():
assert normalize_wing_name("memorymark") == "memorymark"
def test_normalize_wing_name_mixed():
assert normalize_wing_name("My-Cool App") == "my_cool_app"
# --- sanitize_name ---
def test_sanitize_name_ascii():
assert sanitize_name("hello") == "hello"
def test_sanitize_name_latvian():
assert sanitize_name("Jānis") == "Jānis"
def test_sanitize_name_cjk():
assert sanitize_name("太郎") == "太郎"
def test_sanitize_name_cyrillic():
assert sanitize_name("Алексей") == "Алексей"
def test_sanitize_name_rejects_leading_underscore():
with pytest.raises(ValueError):
sanitize_name("_foo")
def test_sanitize_name_rejects_path_traversal():
with pytest.raises(ValueError):
sanitize_name("../etc/passwd")
def test_sanitize_name_rejects_empty():
with pytest.raises(ValueError):
sanitize_name("")
# --- sanitize_kg_value ---
def test_kg_value_accepts_commas():
assert sanitize_kg_value("Alice, Bob, and Carol") == "Alice, Bob, and Carol"
def test_kg_value_accepts_colons():
assert sanitize_kg_value("role: engineer") == "role: engineer"
def test_kg_value_accepts_parentheses():
assert sanitize_kg_value("Python (programming)") == "Python (programming)"
def test_kg_value_accepts_slashes():
assert sanitize_kg_value("owner/repo") == "owner/repo"
def test_kg_value_accepts_hash():
assert sanitize_kg_value("issue #123") == "issue #123"
def test_kg_value_accepts_unicode():
assert sanitize_kg_value("Jānis Bērziņš") == "Jānis Bērziņš"
def test_kg_value_strips_whitespace():
assert sanitize_kg_value(" hello ") == "hello"
def test_kg_value_rejects_empty():
with pytest.raises(ValueError):
sanitize_kg_value("")
def test_kg_value_rejects_whitespace_only():
with pytest.raises(ValueError):
sanitize_kg_value(" ")
def test_kg_value_rejects_null_bytes():
with pytest.raises(ValueError):
sanitize_kg_value("hello\x00world")
def test_kg_value_rejects_over_length():
with pytest.raises(ValueError):
sanitize_kg_value("a" * 129)
# --- sanitize_iso_date ---
def test_iso_date_rejects_year_only():
# Partial dates re-introduce silent empty result sets via lexicographic
# TEXT comparison in KG queries (e.g. "2026-01-01" <= "2026" is False).
with pytest.raises(ValueError):
sanitize_iso_date("2026")
def test_iso_date_rejects_year_month():
with pytest.raises(ValueError):
sanitize_iso_date("2026-03")
def test_iso_date_accepts_full_date():
assert sanitize_iso_date("2026-03-15") == "2026-03-15"
def test_iso_date_passes_through_none():
assert sanitize_iso_date(None) is None
def test_iso_date_passes_through_empty_string():
assert sanitize_iso_date("") == ""
def test_iso_date_strips_whitespace():
assert sanitize_iso_date(" 2026-03-15 ") == "2026-03-15"
def test_iso_date_rejects_natural_language():
with pytest.raises(ValueError):
sanitize_iso_date("March 2026")
def test_iso_date_rejects_abbreviated_month():
with pytest.raises(ValueError):
sanitize_iso_date("Jan 2025")
def test_iso_date_rejects_us_format():
with pytest.raises(ValueError):
sanitize_iso_date("03/15/2026")
def test_iso_date_rejects_invalid_month():
with pytest.raises(ValueError):
sanitize_iso_date("2026-13")
def test_iso_date_rejects_invalid_day():
with pytest.raises(ValueError):
sanitize_iso_date("2026-02-32")
def test_iso_date_rejects_non_string():
with pytest.raises(ValueError):
sanitize_iso_date(20260315)
def test_iso_date_error_names_field():
with pytest.raises(ValueError, match="valid_from"):
sanitize_iso_date("yesterday", "valid_from")