fix(cli): reconfigure stdio to UTF-8 on Windows

The primary `mempalace` console_script (`cli.py:main()`) reads non-ASCII
arguments via piped stdin and writes verbatim drawer text / wing names
through `print()`. On Windows, Python defaults stdio to the system ANSI
codepage (cp1252/cp1251/cp950), so:

- `mempalace search "..." > out.txt` mojibakes any drawer text containing
  non-Latin characters
- `mempalace ... < input.txt` mojibakes piped non-ASCII input

Reconfigure stdin/stdout/stderr to UTF-8 (`errors="strict"`) at the top
of `main()`, mirroring the helper added in this PR for fact_checker's
`__main__` block. Wrapped in try/except so a replaced stream (Jupyter,
test harness) logs a warning and continues rather than crashing the CLI.

The reconfigure cascades through every `mempalace` subcommand
(`init`/`mine`/`search`/`status`/`hook`/etc.) and through the interactive
flows that read non-ASCII names via `input()` (onboarding, entity
detector, room detector). With this commit the package's three
user-facing entry points (`mempalace`, `mempalace-mcp`, and
`python -m mempalace.fact_checker`) all reconfigure stdio identically on
Windows.
This commit is contained in:
mvalentsev
2026-04-30 15:00:37 +05:00
parent 7cee74c8c8
commit 32f4dfa26d
2 changed files with 79 additions and 0 deletions
+52
View File
@@ -1042,3 +1042,55 @@ def test_cmd_repair_trailing_slash_does_not_recurse():
palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
backup_path = palace_path + ".backup"
assert not backup_path.startswith(palace_path + os.sep)
# ── stdio reconfigure on Windows ─────────────────────────────────────
class _ReconfigurableStringIO:
def __init__(self):
self.reconfigure_calls = []
def reconfigure(self, **kwargs):
self.reconfigure_calls.append(kwargs)
def test_reconfigures_stdio_to_utf8_on_windows():
"""Windows `mempalace` CLI must decode/encode stdio as UTF-8.
Without this, piped non-ASCII input (`mempalace search ... < q.txt`)
or piped non-ASCII output (`mempalace search "..." > out.txt`) is
mojibaked through the system ANSI codepage on non-Latin Windows
locales (cp1252/cp1251/cp950).
"""
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
stdin = _ReconfigurableStringIO()
stdout = _ReconfigurableStringIO()
stderr = _ReconfigurableStringIO()
with (
patch.object(sys, "platform", "win32"),
patch.object(sys, "stdin", stdin),
patch.object(sys, "stdout", stdout),
patch.object(sys, "stderr", stderr),
):
_reconfigure_stdio_utf8_on_windows()
expected = {"encoding": "utf-8", "errors": "strict"}
assert stdin.reconfigure_calls == [expected]
assert stdout.reconfigure_calls == [expected]
assert stderr.reconfigure_calls == [expected]
def test_reconfigure_stdio_is_noop_off_windows():
"""Linux/macOS already default to UTF-8 stdio -- helper must not touch streams."""
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
stdin = _ReconfigurableStringIO()
with (
patch.object(sys, "platform", "linux"),
patch.object(sys, "stdin", stdin),
):
_reconfigure_stdio_utf8_on_windows()
assert stdin.reconfigure_calls == []