From 32f4dfa26d25b8ff243bfd2e636f5e96d8947a83 Mon Sep 17 00:00:00 2001 From: mvalentsev Date: Thu, 30 Apr 2026 15:00:37 +0500 Subject: [PATCH] fix(cli): reconfigure stdio to UTF-8 on Windows The primary `mempalace` console_script (`cli.py:main()`) reads non-ASCII arguments via piped stdin and writes verbatim drawer text / wing names through `print()`. On Windows, Python defaults stdio to the system ANSI codepage (cp1252/cp1251/cp950), so: - `mempalace search "..." > out.txt` mojibakes any drawer text containing non-Latin characters - `mempalace ... < input.txt` mojibakes piped non-ASCII input Reconfigure stdin/stdout/stderr to UTF-8 (`errors="strict"`) at the top of `main()`, mirroring the helper added in this PR for fact_checker's `__main__` block. Wrapped in try/except so a replaced stream (Jupyter, test harness) logs a warning and continues rather than crashing the CLI. The reconfigure cascades through every `mempalace` subcommand (`init`/`mine`/`search`/`status`/`hook`/etc.) and through the interactive flows that read non-ASCII names via `input()` (onboarding, entity detector, room detector). With this commit the package's three user-facing entry points (`mempalace`, `mempalace-mcp`, and `python -m mempalace.fact_checker`) all reconfigure stdio identically on Windows. --- mempalace/cli.py | 27 ++++++++++++++++++++++++ tests/test_cli.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/mempalace/cli.py b/mempalace/cli.py index f2606a4..7372cd7 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -935,7 +935,34 @@ def cmd_compress(args): print(" (dry run -- nothing stored)") +def _reconfigure_stdio_utf8_on_windows(): + """Decode stdio as UTF-8 on Windows for the primary `mempalace` CLI. + + Without this, Python defaults stdio to the system ANSI codepage + (cp1252/cp1251/cp950 depending on locale). That mojibakes non-ASCII + content piped in (`mempalace search ... < query.txt`) or piped out + (`mempalace search "..." > out.txt`) when verbatim drawer text or + wing/room names contain non-Latin characters. + """ + if sys.platform != "win32": + return + for name in ("stdin", "stdout", "stderr"): + stream = getattr(sys, name, None) + reconfigure = getattr(stream, "reconfigure", None) + if reconfigure is None: + continue + try: + reconfigure(encoding="utf-8", errors="strict") + except Exception as exc: + print( + f"WARNING: Could not reconfigure {name} to UTF-8: {exc}", + file=sys.stderr, + ) + + def main(): + _reconfigure_stdio_utf8_on_windows() + version_label = f"MemPalace {__version__}" parser = argparse.ArgumentParser( description="MemPalace — Give your AI a memory. No API key required.", diff --git a/tests/test_cli.py b/tests/test_cli.py index 328b90c..4836d69 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1042,3 +1042,55 @@ def test_cmd_repair_trailing_slash_does_not_recurse(): palace_path = os.path.expanduser(args.palace).rstrip(os.sep) backup_path = palace_path + ".backup" assert not backup_path.startswith(palace_path + os.sep) + + +# ── stdio reconfigure on Windows ───────────────────────────────────── + + +class _ReconfigurableStringIO: + def __init__(self): + self.reconfigure_calls = [] + + def reconfigure(self, **kwargs): + self.reconfigure_calls.append(kwargs) + + +def test_reconfigures_stdio_to_utf8_on_windows(): + """Windows `mempalace` CLI must decode/encode stdio as UTF-8. + + Without this, piped non-ASCII input (`mempalace search ... < q.txt`) + or piped non-ASCII output (`mempalace search "..." > out.txt`) is + mojibaked through the system ANSI codepage on non-Latin Windows + locales (cp1252/cp1251/cp950). + """ + from mempalace.cli import _reconfigure_stdio_utf8_on_windows + + stdin = _ReconfigurableStringIO() + stdout = _ReconfigurableStringIO() + stderr = _ReconfigurableStringIO() + with ( + patch.object(sys, "platform", "win32"), + patch.object(sys, "stdin", stdin), + patch.object(sys, "stdout", stdout), + patch.object(sys, "stderr", stderr), + ): + _reconfigure_stdio_utf8_on_windows() + + expected = {"encoding": "utf-8", "errors": "strict"} + assert stdin.reconfigure_calls == [expected] + assert stdout.reconfigure_calls == [expected] + assert stderr.reconfigure_calls == [expected] + + +def test_reconfigure_stdio_is_noop_off_windows(): + """Linux/macOS already default to UTF-8 stdio -- helper must not touch streams.""" + from mempalace.cli import _reconfigure_stdio_utf8_on_windows + + stdin = _ReconfigurableStringIO() + with ( + patch.object(sys, "platform", "linux"), + patch.object(sys, "stdin", stdin), + ): + _reconfigure_stdio_utf8_on_windows() + + assert stdin.reconfigure_calls == []