* fix: allow Unicode in sanitize_name() — Latvian, CJK, Cyrillic names (#637) _SAFE_NAME_RE was ASCII-only ([a-zA-Z0-9]), rejecting valid Unicode names like "Jānis" or "太郎". Changed to \w which matches Unicode word characters (letters, digits, underscore) in Python 3. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: tighten Unicode regex, add sanitize_name tests Use [^\W_] for first/last char to allow Unicode letters/digits but reject leading/trailing underscores (Copilot feedback). Add 7 tests covering Latvian, CJK, Cyrillic, path traversal, and edge cases. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -16,7 +16,7 @@ from pathlib import Path
|
|||||||
# in file paths, SQLite, or ChromaDB metadata.
|
# in file paths, SQLite, or ChromaDB metadata.
|
||||||
|
|
||||||
MAX_NAME_LENGTH = 128
|
MAX_NAME_LENGTH = 128
|
||||||
_SAFE_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_ .'-]{0,126}[a-zA-Z0-9]?$")
|
_SAFE_NAME_RE = re.compile(r"^(?:[^\W_]|[^\W_][\w .'-]{0,126}[^\W_])$")
|
||||||
|
|
||||||
|
|
||||||
def sanitize_name(value: str, field_name: str = "name") -> str:
|
def sanitize_name(value: str, field_name: str = "name") -> str:
|
||||||
|
|||||||
+37
-1
@@ -1,7 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import tempfile
|
import tempfile
|
||||||
from mempalace.config import MempalaceConfig
|
|
||||||
|
import pytest
|
||||||
|
from mempalace.config import MempalaceConfig, sanitize_name
|
||||||
|
|
||||||
|
|
||||||
def test_default_config():
|
def test_default_config():
|
||||||
@@ -30,3 +32,37 @@ def test_init():
|
|||||||
cfg = MempalaceConfig(config_dir=tmpdir)
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
||||||
cfg.init()
|
cfg.init()
|
||||||
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
||||||
|
|
||||||
|
|
||||||
|
# --- sanitize_name ---
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_name_ascii():
|
||||||
|
assert sanitize_name("hello") == "hello"
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_name_latvian():
|
||||||
|
assert sanitize_name("Jānis") == "Jānis"
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_name_cjk():
|
||||||
|
assert sanitize_name("太郎") == "太郎"
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_name_cyrillic():
|
||||||
|
assert sanitize_name("Алексей") == "Алексей"
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_name_rejects_leading_underscore():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_name("_foo")
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_name_rejects_path_traversal():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_name("../etc/passwd")
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_name_rejects_empty():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
sanitize_name("")
|
||||||
|
|||||||
Reference in New Issue
Block a user