From 6e2ced3287d28b883bcd49d667e2892ef12e238e Mon Sep 17 00:00:00 2001 From: Jeffrey Hein Date: Sun, 12 Apr 2026 14:23:34 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20allow=20Unicode=20in=20sanitize=5Fname()?= =?UTF-8?q?=20=E2=80=94=20Latvian,=20CJK,=20Cyrillic=20(#637)=20(#683)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: allow Unicode in sanitize_name() — Latvian, CJK, Cyrillic names (#637) _SAFE_NAME_RE was ASCII-only ([a-zA-Z0-9]), rejecting valid Unicode names like "Jānis" or "太郎". Changed to \w which matches Unicode word characters (letters, digits, underscore) in Python 3. Co-Authored-By: Claude Opus 4.6 * fix: tighten Unicode regex, add sanitize_name tests Use [^\W_] for first/last char to allow Unicode letters/digits but reject leading/trailing underscores (Copilot feedback). Add 7 tests covering Latvian, CJK, Cyrillic, path traversal, and edge cases. Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- mempalace/config.py | 2 +- tests/test_config.py | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/mempalace/config.py b/mempalace/config.py index fd82be7..6e84de1 100644 --- a/mempalace/config.py +++ b/mempalace/config.py @@ -16,7 +16,7 @@ from pathlib import Path # in file paths, SQLite, or ChromaDB metadata. MAX_NAME_LENGTH = 128 -_SAFE_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_ .'-]{0,126}[a-zA-Z0-9]?$") +_SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$") def sanitize_name(value: str, field_name: str = "name") -> str: diff --git a/tests/test_config.py b/tests/test_config.py index a36b74d..761efe9 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,9 @@ import os import json import tempfile -from mempalace.config import MempalaceConfig + +import pytest +from mempalace.config import MempalaceConfig, sanitize_name def test_default_config(): @@ -30,3 +32,37 @@ def test_init(): cfg = MempalaceConfig(config_dir=tmpdir) cfg.init() assert os.path.exists(os.path.join(tmpdir, "config.json")) + + +# --- sanitize_name --- + + +def test_sanitize_name_ascii(): + assert sanitize_name("hello") == "hello" + + +def test_sanitize_name_latvian(): + assert sanitize_name("Jānis") == "Jānis" + + +def test_sanitize_name_cjk(): + assert sanitize_name("太郎") == "太郎" + + +def test_sanitize_name_cyrillic(): + assert sanitize_name("Алексей") == "Алексей" + + +def test_sanitize_name_rejects_leading_underscore(): + with pytest.raises(ValueError): + sanitize_name("_foo") + + +def test_sanitize_name_rejects_path_traversal(): + with pytest.raises(ValueError): + sanitize_name("../etc/passwd") + + +def test_sanitize_name_rejects_empty(): + with pytest.raises(ValueError): + sanitize_name("")