2026-04-04 18:33:42 -07:00
|
|
|
import os
|
|
|
|
|
import json
|
|
|
|
|
import tempfile
|
2026-04-12 14:23:34 -07:00
|
|
|
|
|
|
|
|
import pytest
|
2026-04-14 09:26:47 -04:00
|
|
|
from mempalace.config import MempalaceConfig, sanitize_kg_value, sanitize_name
|
2026-04-04 18:33:42 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_default_config():
|
|
|
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
|
|
|
assert "palace" in cfg.palace_path
|
|
|
|
|
assert cfg.collection_name == "mempalace_drawers"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_config_from_file():
|
|
|
|
|
tmpdir = tempfile.mkdtemp()
|
|
|
|
|
with open(os.path.join(tmpdir, "config.json"), "w") as f:
|
|
|
|
|
json.dump({"palace_path": "/custom/palace"}, f)
|
|
|
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
|
|
|
assert cfg.palace_path == "/custom/palace"
|
|
|
|
|
|
|
|
|
|
|
2026-04-24 23:09:23 +00:00
|
|
|
def test_embedding_device_defaults_to_auto(monkeypatch):
|
|
|
|
|
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
|
2026-04-24 23:06:50 +00:00
|
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
|
|
|
assert cfg.embedding_device == "auto"
|
|
|
|
|
|
|
|
|
|
|
2026-04-24 23:09:23 +00:00
|
|
|
def test_embedding_device_from_config_is_normalized(tmp_path, monkeypatch):
|
|
|
|
|
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
|
2026-04-24 23:08:26 +00:00
|
|
|
with open(tmp_path / "config.json", "w") as f:
|
2026-04-24 23:06:50 +00:00
|
|
|
json.dump({"embedding_device": " CUDA "}, f)
|
|
|
|
|
|
2026-04-24 23:08:26 +00:00
|
|
|
cfg = MempalaceConfig(config_dir=str(tmp_path))
|
2026-04-24 23:06:50 +00:00
|
|
|
assert cfg.embedding_device == "cuda"
|
|
|
|
|
|
|
|
|
|
|
2026-04-24 23:09:23 +00:00
|
|
|
def test_embedding_device_env_overrides_config(tmp_path, monkeypatch):
|
2026-04-24 23:08:26 +00:00
|
|
|
with open(tmp_path / "config.json", "w") as f:
|
2026-04-24 23:06:50 +00:00
|
|
|
json.dump({"embedding_device": "cpu"}, f)
|
2026-04-24 23:09:23 +00:00
|
|
|
monkeypatch.setenv("MEMPALACE_EMBEDDING_DEVICE", " CoreML ")
|
|
|
|
|
|
|
|
|
|
cfg = MempalaceConfig(config_dir=str(tmp_path))
|
|
|
|
|
assert cfg.embedding_device == "coreml"
|
2026-04-24 23:06:50 +00:00
|
|
|
|
|
|
|
|
|
2026-04-04 18:33:42 -07:00
|
|
|
def test_env_override():
|
2026-04-24 11:13:51 +02:00
|
|
|
raw = "/env/palace"
|
|
|
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
|
|
|
|
try:
|
|
|
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
|
|
|
# palace_path normalizes with abspath + expanduser to match the
|
|
|
|
|
# --palace CLI code path. On Unix that's a no-op for "/env/palace";
|
|
|
|
|
# on Windows abspath prepends the current drive letter.
|
|
|
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
|
|
|
|
finally:
|
|
|
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
2026-04-04 18:33:42 -07:00
|
|
|
|
|
|
|
|
|
2026-04-24 11:06:30 +02:00
|
|
|
def test_env_path_expanduser():
|
2026-04-24 11:20:30 +02:00
|
|
|
# Tilde must be expanded to match the --palace CLI code path. We don't
|
|
|
|
|
# assert "~" is absent from the final string because Windows 8.3 short
|
|
|
|
|
# paths (e.g. C:\Users\RUNNER~1\...) legitimately contain tildes — the
|
|
|
|
|
# equality check is authoritative.
|
2026-04-24 11:13:51 +02:00
|
|
|
raw = os.path.join("~", "mempalace-test")
|
|
|
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
2026-04-24 11:06:30 +02:00
|
|
|
try:
|
|
|
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
2026-04-24 11:13:51 +02:00
|
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
2026-04-24 11:06:30 +02:00
|
|
|
assert cfg.palace_path.endswith("mempalace-test")
|
|
|
|
|
finally:
|
|
|
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_env_path_abspath_collapses_traversal():
|
2026-04-24 11:13:51 +02:00
|
|
|
# Build a raw path with a .. segment using the platform separator so
|
|
|
|
|
# the assertion is portable (Windows uses \, POSIX uses /).
|
|
|
|
|
raw = os.path.join(tempfile.gettempdir(), "palace", "..", "mempalace-test")
|
|
|
|
|
expected = os.path.abspath(os.path.expanduser(raw))
|
|
|
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
2026-04-24 11:06:30 +02:00
|
|
|
try:
|
|
|
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
|
|
|
# .. segments must be collapsed, not preserved literally.
|
|
|
|
|
assert ".." not in cfg.palace_path
|
2026-04-24 11:13:51 +02:00
|
|
|
assert cfg.palace_path == expected
|
2026-04-24 11:06:30 +02:00
|
|
|
finally:
|
|
|
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_env_path_legacy_alias_normalized():
|
2026-04-24 11:20:30 +02:00
|
|
|
# Legacy MEMPAL_PALACE_PATH gets the same normalization treatment as
|
|
|
|
|
# MEMPALACE_PALACE_PATH. We don't assert "~" is absent from the final
|
|
|
|
|
# string because Windows 8.3 short paths (e.g. C:\Users\RUNNER~1\...)
|
|
|
|
|
# legitimately contain tildes — the equality check below is authoritative.
|
2026-04-24 11:06:30 +02:00
|
|
|
os.environ.pop("MEMPALACE_PALACE_PATH", None)
|
2026-04-24 11:13:51 +02:00
|
|
|
raw = os.path.join("~", "legacy-alias", "..", "mempalace-test")
|
|
|
|
|
os.environ["MEMPAL_PALACE_PATH"] = raw
|
2026-04-24 11:06:30 +02:00
|
|
|
try:
|
|
|
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
|
|
|
assert ".." not in cfg.palace_path
|
2026-04-24 11:13:51 +02:00
|
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
2026-04-24 11:06:30 +02:00
|
|
|
finally:
|
|
|
|
|
del os.environ["MEMPAL_PALACE_PATH"]
|
|
|
|
|
|
|
|
|
|
|
2026-04-04 18:33:42 -07:00
|
|
|
def test_init():
|
|
|
|
|
tmpdir = tempfile.mkdtemp()
|
|
|
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
|
|
|
cfg.init()
|
|
|
|
|
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
2026-04-12 14:23:34 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# --- sanitize_name ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_sanitize_name_ascii():
|
|
|
|
|
assert sanitize_name("hello") == "hello"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_sanitize_name_latvian():
|
|
|
|
|
assert sanitize_name("Jānis") == "Jānis"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_sanitize_name_cjk():
|
|
|
|
|
assert sanitize_name("太郎") == "太郎"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_sanitize_name_cyrillic():
|
|
|
|
|
assert sanitize_name("Алексей") == "Алексей"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_sanitize_name_rejects_leading_underscore():
|
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
sanitize_name("_foo")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_sanitize_name_rejects_path_traversal():
|
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
sanitize_name("../etc/passwd")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_sanitize_name_rejects_empty():
|
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
sanitize_name("")
|
2026-04-14 09:26:47 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# --- sanitize_kg_value ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_accepts_commas():
|
|
|
|
|
assert sanitize_kg_value("Alice, Bob, and Carol") == "Alice, Bob, and Carol"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_accepts_colons():
|
|
|
|
|
assert sanitize_kg_value("role: engineer") == "role: engineer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_accepts_parentheses():
|
|
|
|
|
assert sanitize_kg_value("Python (programming)") == "Python (programming)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_accepts_slashes():
|
|
|
|
|
assert sanitize_kg_value("owner/repo") == "owner/repo"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_accepts_hash():
|
|
|
|
|
assert sanitize_kg_value("issue #123") == "issue #123"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_accepts_unicode():
|
|
|
|
|
assert sanitize_kg_value("Jānis Bērziņš") == "Jānis Bērziņš"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_strips_whitespace():
|
|
|
|
|
assert sanitize_kg_value(" hello ") == "hello"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_rejects_empty():
|
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
sanitize_kg_value("")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_rejects_whitespace_only():
|
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
sanitize_kg_value(" ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_rejects_null_bytes():
|
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
sanitize_kg_value("hello\x00world")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_kg_value_rejects_over_length():
|
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
sanitize_kg_value("a" * 129)
|