Merge pull request #946 from mvalentsev/fix/utf8-read-text
fix: add explicit UTF-8 encoding to read_text() calls (#776)
This commit is contained in:
@@ -308,7 +308,7 @@ class EntityRegistry:
|
|||||||
path = (Path(config_dir) / "entity_registry.json") if config_dir else cls.DEFAULT_PATH
|
path = (Path(config_dir) / "entity_registry.json") if config_dir else cls.DEFAULT_PATH
|
||||||
if path.exists():
|
if path.exists():
|
||||||
try:
|
try:
|
||||||
data = json.loads(path.read_text())
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
return cls(data, path)
|
return cls(data, path)
|
||||||
except (json.JSONDecodeError, OSError):
|
except (json.JSONDecodeError, OSError):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -25,4 +25,4 @@ def run_instructions(name: str):
|
|||||||
print(f"Instructions file not found: {md_path}", file=sys.stderr)
|
print(f"Instructions file not found: {md_path}", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(md_path.read_text())
|
print(md_path.read_text(encoding="utf-8"))
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ def _load_known_names_config(force_reload: bool = False):
|
|||||||
|
|
||||||
if _KNOWN_NAMES_PATH.exists():
|
if _KNOWN_NAMES_PATH.exists():
|
||||||
try:
|
try:
|
||||||
_KNOWN_NAMES_CACHE = json.loads(_KNOWN_NAMES_PATH.read_text())
|
_KNOWN_NAMES_CACHE = json.loads(_KNOWN_NAMES_PATH.read_text(encoding="utf-8"))
|
||||||
return _KNOWN_NAMES_CACHE
|
return _KNOWN_NAMES_CACHE
|
||||||
except (json.JSONDecodeError, OSError):
|
except (json.JSONDecodeError, OSError):
|
||||||
pass
|
pass
|
||||||
@@ -184,7 +184,7 @@ def split_file(filepath, output_dir, dry_run=False):
|
|||||||
path = Path(filepath)
|
path = Path(filepath)
|
||||||
max_size = 500 * 1024 * 1024 # 500 MB safety limit
|
max_size = 500 * 1024 * 1024 # 500 MB safety limit
|
||||||
if path.stat().st_size > max_size:
|
if path.stat().st_size > max_size:
|
||||||
print(f" SKIP: {path.name} exceeds {max_size // (1024*1024)} MB limit")
|
print(f" SKIP: {path.name} exceeds {max_size // (1024 * 1024)} MB limit")
|
||||||
return []
|
return []
|
||||||
lines = path.read_text(errors="replace").splitlines(keepends=True)
|
lines = path.read_text(errors="replace").splitlines(keepends=True)
|
||||||
|
|
||||||
@@ -273,7 +273,7 @@ def main():
|
|||||||
max_scan_size = 500 * 1024 * 1024 # 500 MB
|
max_scan_size = 500 * 1024 * 1024 # 500 MB
|
||||||
for f in files:
|
for f in files:
|
||||||
if f.stat().st_size > max_scan_size:
|
if f.stat().st_size > max_scan_size:
|
||||||
print(f" SKIP: {f.name} exceeds {max_scan_size // (1024*1024)} MB limit")
|
print(f" SKIP: {f.name} exceeds {max_scan_size // (1024 * 1024)} MB limit")
|
||||||
continue
|
continue
|
||||||
lines = f.read_text(errors="replace").splitlines(keepends=True)
|
lines = f.read_text(errors="replace").splitlines(keepends=True)
|
||||||
boundaries = find_session_boundaries(lines)
|
boundaries = find_session_boundaries(lines)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from mempalace.instructions_cli import AVAILABLE, INSTRUCTIONS_DIR, run_instruct
|
|||||||
def test_run_instructions_valid_name(capsys):
|
def test_run_instructions_valid_name(capsys):
|
||||||
"""Valid name prints the .md file content."""
|
"""Valid name prints the .md file content."""
|
||||||
name = "init"
|
name = "init"
|
||||||
expected = (INSTRUCTIONS_DIR / f"{name}.md").read_text()
|
expected = (INSTRUCTIONS_DIR / f"{name}.md").read_text(encoding="utf-8")
|
||||||
run_instructions(name)
|
run_instructions(name)
|
||||||
captured = capsys.readouterr()
|
captured = capsys.readouterr()
|
||||||
assert captured.out.strip() == expected.strip()
|
assert captured.out.strip() == expected.strip()
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ def test_generate_aaak_bootstrap_entities_content(tmp_path):
|
|||||||
wings = ["family"]
|
wings = ["family"]
|
||||||
_generate_aaak_bootstrap(people, projects, wings, "personal", config_dir=tmp_path)
|
_generate_aaak_bootstrap(people, projects, wings, "personal", config_dir=tmp_path)
|
||||||
|
|
||||||
content = (tmp_path / "aaak_entities.md").read_text()
|
content = (tmp_path / "aaak_entities.md").read_text(encoding="utf-8")
|
||||||
assert "Riley" in content
|
assert "Riley" in content
|
||||||
assert "RIL" in content # entity code
|
assert "RIL" in content # entity code
|
||||||
assert "MemPalace" in content
|
assert "MemPalace" in content
|
||||||
@@ -171,7 +171,7 @@ def test_generate_aaak_bootstrap_facts_content(tmp_path):
|
|||||||
wings = ["projects"]
|
wings = ["projects"]
|
||||||
_generate_aaak_bootstrap(people, projects, wings, "work", config_dir=tmp_path)
|
_generate_aaak_bootstrap(people, projects, wings, "work", config_dir=tmp_path)
|
||||||
|
|
||||||
content = (tmp_path / "critical_facts.md").read_text()
|
content = (tmp_path / "critical_facts.md").read_text(encoding="utf-8")
|
||||||
assert "Alice" in content
|
assert "Alice" in content
|
||||||
assert "Acme" in content
|
assert "Acme" in content
|
||||||
assert "work" in content.lower()
|
assert "work" in content.lower()
|
||||||
@@ -190,7 +190,7 @@ def test_generate_aaak_bootstrap_collision(tmp_path):
|
|||||||
{"name": "Alison", "relationship": "coworker", "context": "work"},
|
{"name": "Alison", "relationship": "coworker", "context": "work"},
|
||||||
]
|
]
|
||||||
_generate_aaak_bootstrap(people, [], ["work"], "work", config_dir=tmp_path)
|
_generate_aaak_bootstrap(people, [], ["work"], "work", config_dir=tmp_path)
|
||||||
content = (tmp_path / "aaak_entities.md").read_text()
|
content = (tmp_path / "aaak_entities.md").read_text(encoding="utf-8")
|
||||||
assert "ALI" in content
|
assert "ALI" in content
|
||||||
assert "ALIS" in content
|
assert "ALIS" in content
|
||||||
|
|
||||||
@@ -199,7 +199,7 @@ def test_generate_aaak_bootstrap_no_relationship(tmp_path):
|
|||||||
"""Person without relationship string still generates entry."""
|
"""Person without relationship string still generates entry."""
|
||||||
people = [{"name": "Bob", "context": "work"}]
|
people = [{"name": "Bob", "context": "work"}]
|
||||||
_generate_aaak_bootstrap(people, [], ["work"], "work", config_dir=tmp_path)
|
_generate_aaak_bootstrap(people, [], ["work"], "work", config_dir=tmp_path)
|
||||||
content = (tmp_path / "aaak_entities.md").read_text()
|
content = (tmp_path / "aaak_entities.md").read_text(encoding="utf-8")
|
||||||
assert "BOB=Bob" in content
|
assert "BOB=Bob" in content
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user