fix: allow Unicode in sanitize_name() — Latvian, CJK, Cyrillic (#637) (#683)

* fix: allow Unicode in sanitize_name() — Latvian, CJK, Cyrillic names (#637)

_SAFE_NAME_RE was ASCII-only ([a-zA-Z0-9]), rejecting valid Unicode
names like "Jānis" or "太郎". Changed to \w which matches Unicode
word characters (letters, digits, underscore) in Python 3.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: tighten Unicode regex, add sanitize_name tests

Use [^\W_] for first/last char to allow Unicode letters/digits but
reject leading/trailing underscores (Copilot feedback). Add 7 tests
covering Latvian, CJK, Cyrillic, path traversal, and edge cases.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeffrey Hein
2026-04-12 14:23:34 -07:00
committed by GitHub
parent 915b8b2c75
commit 6e2ced3287
2 changed files with 38 additions and 2 deletions
+1 -1
View File
@@ -16,7 +16,7 @@ from pathlib import Path
# in file paths, SQLite, or ChromaDB metadata.
MAX_NAME_LENGTH = 128
_SAFE_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_ .'-]{0,126}[a-zA-Z0-9]?$")
_SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$")
def sanitize_name(value: str, field_name: str = "name") -> str:
+37 -1
View File
@@ -1,7 +1,9 @@
import os
import json
import tempfile
from mempalace.config import MempalaceConfig
import pytest
from mempalace.config import MempalaceConfig, sanitize_name
def test_default_config():
@@ -30,3 +32,37 @@ def test_init():
cfg = MempalaceConfig(config_dir=tmpdir)
cfg.init()
assert os.path.exists(os.path.join(tmpdir, "config.json"))
# --- sanitize_name ---
def test_sanitize_name_ascii():
assert sanitize_name("hello") == "hello"
def test_sanitize_name_latvian():
assert sanitize_name("Jānis") == "Jānis"
def test_sanitize_name_cjk():
assert sanitize_name("太郎") == "太郎"
def test_sanitize_name_cyrillic():
assert sanitize_name("Алексей") == "Алексей"
def test_sanitize_name_rejects_leading_underscore():
with pytest.raises(ValueError):
sanitize_name("_foo")
def test_sanitize_name_rejects_path_traversal():
with pytest.raises(ValueError):
sanitize_name("../etc/passwd")
def test_sanitize_name_rejects_empty():
with pytest.raises(ValueError):
sanitize_name("")