4d98b05240
tool_kg_query (as_of), tool_kg_add (valid_from), and tool_kg_invalidate (ended) accepted any string and forwarded it to SQLite without format validation. Parameterized queries prevent SQL injection, but invalid date strings silently produce empty result sets — callers cannot distinguish "no fact at this time" from "your date format was unrecognized." This is especially painful for natural-language LLM callers that synthesize dates like "March 2026" or "Jan 2025". Add sanitize_iso_date() in config.py alongside the other input validators. It accepts YYYY, YYYY-MM, and YYYY-MM-DD forms; passes through None/empty; and raises ValueError with a field-named message on anything else. Call it from the three kg MCP tool wrappers before values reach the storage layer so the caller gets a clear error instead of a silent miss. Closes #1164
283 lines
8.0 KiB
Python
283 lines
8.0 KiB
Python
import os
|
|
import json
|
|
import tempfile
|
|
|
|
import pytest
|
|
from mempalace.config import (
|
|
MempalaceConfig,
|
|
normalize_wing_name,
|
|
sanitize_iso_date,
|
|
sanitize_kg_value,
|
|
sanitize_name,
|
|
)
|
|
|
|
|
|
def test_default_config():
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert "palace" in cfg.palace_path
|
|
assert cfg.collection_name == "mempalace_drawers"
|
|
|
|
|
|
def test_config_from_file():
|
|
tmpdir = tempfile.mkdtemp()
|
|
with open(os.path.join(tmpdir, "config.json"), "w") as f:
|
|
json.dump({"palace_path": "/custom/palace"}, f)
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
assert cfg.palace_path == "/custom/palace"
|
|
|
|
|
|
def test_embedding_device_defaults_to_auto(monkeypatch):
|
|
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert cfg.embedding_device == "auto"
|
|
|
|
|
|
def test_embedding_device_from_config_is_normalized(tmp_path, monkeypatch):
|
|
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
|
|
with open(tmp_path / "config.json", "w") as f:
|
|
json.dump({"embedding_device": " CUDA "}, f)
|
|
|
|
cfg = MempalaceConfig(config_dir=str(tmp_path))
|
|
assert cfg.embedding_device == "cuda"
|
|
|
|
|
|
def test_embedding_device_env_overrides_config(tmp_path, monkeypatch):
|
|
with open(tmp_path / "config.json", "w") as f:
|
|
json.dump({"embedding_device": "cpu"}, f)
|
|
monkeypatch.setenv("MEMPALACE_EMBEDDING_DEVICE", " CoreML ")
|
|
|
|
cfg = MempalaceConfig(config_dir=str(tmp_path))
|
|
assert cfg.embedding_device == "coreml"
|
|
|
|
|
|
def test_env_override():
|
|
raw = "/env/palace"
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
# palace_path normalizes with abspath + expanduser to match the
|
|
# --palace CLI code path. On Unix that's a no-op for "/env/palace";
|
|
# on Windows abspath prepends the current drive letter.
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
|
finally:
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
def test_env_path_expanduser():
|
|
# Tilde must be expanded to match the --palace CLI code path. We don't
|
|
# assert "~" is absent from the final string because Windows 8.3 short
|
|
# paths (e.g. C:\Users\RUNNER~1\...) legitimately contain tildes — the
|
|
# equality check is authoritative.
|
|
raw = os.path.join("~", "mempalace-test")
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
|
assert cfg.palace_path.endswith("mempalace-test")
|
|
finally:
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
def test_env_path_abspath_collapses_traversal():
|
|
# Build a raw path with a .. segment using the platform separator so
|
|
# the assertion is portable (Windows uses \, POSIX uses /).
|
|
raw = os.path.join(tempfile.gettempdir(), "palace", "..", "mempalace-test")
|
|
expected = os.path.abspath(os.path.expanduser(raw))
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
# .. segments must be collapsed, not preserved literally.
|
|
assert ".." not in cfg.palace_path
|
|
assert cfg.palace_path == expected
|
|
finally:
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
def test_env_path_legacy_alias_normalized():
|
|
# Legacy MEMPAL_PALACE_PATH gets the same normalization treatment as
|
|
# MEMPALACE_PALACE_PATH. We don't assert "~" is absent from the final
|
|
# string because Windows 8.3 short paths (e.g. C:\Users\RUNNER~1\...)
|
|
# legitimately contain tildes — the equality check below is authoritative.
|
|
os.environ.pop("MEMPALACE_PALACE_PATH", None)
|
|
raw = os.path.join("~", "legacy-alias", "..", "mempalace-test")
|
|
os.environ["MEMPAL_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert ".." not in cfg.palace_path
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
|
finally:
|
|
del os.environ["MEMPAL_PALACE_PATH"]
|
|
|
|
|
|
def test_init():
|
|
tmpdir = tempfile.mkdtemp()
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
cfg.init()
|
|
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
|
|
|
|
|
# --- normalize_wing_name ---
|
|
|
|
|
|
def test_normalize_wing_name_hyphen():
|
|
assert normalize_wing_name("mempal-private") == "mempal_private"
|
|
|
|
|
|
def test_normalize_wing_name_space():
|
|
assert normalize_wing_name("My Project") == "my_project"
|
|
|
|
|
|
def test_normalize_wing_name_already_clean():
|
|
assert normalize_wing_name("memorymark") == "memorymark"
|
|
|
|
|
|
def test_normalize_wing_name_mixed():
|
|
assert normalize_wing_name("My-Cool App") == "my_cool_app"
|
|
|
|
|
|
# --- sanitize_name ---
|
|
|
|
|
|
def test_sanitize_name_ascii():
|
|
assert sanitize_name("hello") == "hello"
|
|
|
|
|
|
def test_sanitize_name_latvian():
|
|
assert sanitize_name("Jānis") == "Jānis"
|
|
|
|
|
|
def test_sanitize_name_cjk():
|
|
assert sanitize_name("太郎") == "太郎"
|
|
|
|
|
|
def test_sanitize_name_cyrillic():
|
|
assert sanitize_name("Алексей") == "Алексей"
|
|
|
|
|
|
def test_sanitize_name_rejects_leading_underscore():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("_foo")
|
|
|
|
|
|
def test_sanitize_name_rejects_path_traversal():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("../etc/passwd")
|
|
|
|
|
|
def test_sanitize_name_rejects_empty():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("")
|
|
|
|
|
|
# --- sanitize_kg_value ---
|
|
|
|
|
|
def test_kg_value_accepts_commas():
|
|
assert sanitize_kg_value("Alice, Bob, and Carol") == "Alice, Bob, and Carol"
|
|
|
|
|
|
def test_kg_value_accepts_colons():
|
|
assert sanitize_kg_value("role: engineer") == "role: engineer"
|
|
|
|
|
|
def test_kg_value_accepts_parentheses():
|
|
assert sanitize_kg_value("Python (programming)") == "Python (programming)"
|
|
|
|
|
|
def test_kg_value_accepts_slashes():
|
|
assert sanitize_kg_value("owner/repo") == "owner/repo"
|
|
|
|
|
|
def test_kg_value_accepts_hash():
|
|
assert sanitize_kg_value("issue #123") == "issue #123"
|
|
|
|
|
|
def test_kg_value_accepts_unicode():
|
|
assert sanitize_kg_value("Jānis Bērziņš") == "Jānis Bērziņš"
|
|
|
|
|
|
def test_kg_value_strips_whitespace():
|
|
assert sanitize_kg_value(" hello ") == "hello"
|
|
|
|
|
|
def test_kg_value_rejects_empty():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("")
|
|
|
|
|
|
def test_kg_value_rejects_whitespace_only():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value(" ")
|
|
|
|
|
|
def test_kg_value_rejects_null_bytes():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("hello\x00world")
|
|
|
|
|
|
def test_kg_value_rejects_over_length():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("a" * 129)
|
|
|
|
|
|
# --- sanitize_iso_date ---
|
|
|
|
|
|
def test_iso_date_accepts_year_only():
|
|
assert sanitize_iso_date("2026") == "2026"
|
|
|
|
|
|
def test_iso_date_accepts_year_month():
|
|
assert sanitize_iso_date("2026-03") == "2026-03"
|
|
|
|
|
|
def test_iso_date_accepts_full_date():
|
|
assert sanitize_iso_date("2026-03-15") == "2026-03-15"
|
|
|
|
|
|
def test_iso_date_passes_through_none():
|
|
assert sanitize_iso_date(None) is None
|
|
|
|
|
|
def test_iso_date_passes_through_empty_string():
|
|
assert sanitize_iso_date("") == ""
|
|
|
|
|
|
def test_iso_date_strips_whitespace():
|
|
assert sanitize_iso_date(" 2026-03-15 ") == "2026-03-15"
|
|
|
|
|
|
def test_iso_date_rejects_natural_language():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("March 2026")
|
|
|
|
|
|
def test_iso_date_rejects_abbreviated_month():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("Jan 2025")
|
|
|
|
|
|
def test_iso_date_rejects_us_format():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("03/15/2026")
|
|
|
|
|
|
def test_iso_date_rejects_invalid_month():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("2026-13")
|
|
|
|
|
|
def test_iso_date_rejects_invalid_day():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("2026-02-32")
|
|
|
|
|
|
def test_iso_date_rejects_non_string():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date(20260315)
|
|
|
|
|
|
def test_iso_date_error_names_field():
|
|
with pytest.raises(ValueError, match="valid_from"):
|
|
sanitize_iso_date("yesterday", "valid_from")
|