79c9c0e517
sanitize_name rejects commas, colons, parentheses, and slashes — characters that commonly appear in knowledge graph subject/object values. Adds sanitize_kg_value for KG entity fields (subject, object, entity) while keeping sanitize_name for predicates and wing/room names.
120 lines
2.9 KiB
Python
120 lines
2.9 KiB
Python
import os
|
|
import json
|
|
import tempfile
|
|
|
|
import pytest
|
|
from mempalace.config import MempalaceConfig, sanitize_kg_value, sanitize_name
|
|
|
|
|
|
def test_default_config():
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert "palace" in cfg.palace_path
|
|
assert cfg.collection_name == "mempalace_drawers"
|
|
|
|
|
|
def test_config_from_file():
|
|
tmpdir = tempfile.mkdtemp()
|
|
with open(os.path.join(tmpdir, "config.json"), "w") as f:
|
|
json.dump({"palace_path": "/custom/palace"}, f)
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
assert cfg.palace_path == "/custom/palace"
|
|
|
|
|
|
def test_env_override():
|
|
os.environ["MEMPALACE_PALACE_PATH"] = "/env/palace"
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert cfg.palace_path == "/env/palace"
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
def test_init():
|
|
tmpdir = tempfile.mkdtemp()
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
cfg.init()
|
|
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
|
|
|
|
|
# --- sanitize_name ---
|
|
|
|
|
|
def test_sanitize_name_ascii():
|
|
assert sanitize_name("hello") == "hello"
|
|
|
|
|
|
def test_sanitize_name_latvian():
|
|
assert sanitize_name("Jānis") == "Jānis"
|
|
|
|
|
|
def test_sanitize_name_cjk():
|
|
assert sanitize_name("太郎") == "太郎"
|
|
|
|
|
|
def test_sanitize_name_cyrillic():
|
|
assert sanitize_name("Алексей") == "Алексей"
|
|
|
|
|
|
def test_sanitize_name_rejects_leading_underscore():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("_foo")
|
|
|
|
|
|
def test_sanitize_name_rejects_path_traversal():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("../etc/passwd")
|
|
|
|
|
|
def test_sanitize_name_rejects_empty():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("")
|
|
|
|
|
|
# --- sanitize_kg_value ---
|
|
|
|
|
|
def test_kg_value_accepts_commas():
|
|
assert sanitize_kg_value("Alice, Bob, and Carol") == "Alice, Bob, and Carol"
|
|
|
|
|
|
def test_kg_value_accepts_colons():
|
|
assert sanitize_kg_value("role: engineer") == "role: engineer"
|
|
|
|
|
|
def test_kg_value_accepts_parentheses():
|
|
assert sanitize_kg_value("Python (programming)") == "Python (programming)"
|
|
|
|
|
|
def test_kg_value_accepts_slashes():
|
|
assert sanitize_kg_value("owner/repo") == "owner/repo"
|
|
|
|
|
|
def test_kg_value_accepts_hash():
|
|
assert sanitize_kg_value("issue #123") == "issue #123"
|
|
|
|
|
|
def test_kg_value_accepts_unicode():
|
|
assert sanitize_kg_value("Jānis Bērziņš") == "Jānis Bērziņš"
|
|
|
|
|
|
def test_kg_value_strips_whitespace():
|
|
assert sanitize_kg_value(" hello ") == "hello"
|
|
|
|
|
|
def test_kg_value_rejects_empty():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("")
|
|
|
|
|
|
def test_kg_value_rejects_whitespace_only():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value(" ")
|
|
|
|
|
|
def test_kg_value_rejects_null_bytes():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("hello\x00world")
|
|
|
|
|
|
def test_kg_value_rejects_over_length():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("a" * 129)
|