fix(cli): reconfigure stdio to UTF-8 on Windows

The primary `mempalace` console_script (`cli.py:main()`) reads non-ASCII
arguments via piped stdin and writes verbatim drawer text / wing names
through `print()`. On Windows, Python defaults stdio to the system ANSI
codepage (cp1252/cp1251/cp950), so:

- `mempalace search "..." > out.txt` mojibakes any drawer text containing
  non-Latin characters
- `mempalace ... < input.txt` mojibakes piped non-ASCII input

Reconfigure stdin/stdout/stderr to UTF-8 (`errors="strict"`) at the top
of `main()`, mirroring the helper added in this PR for fact_checker's
`__main__` block. Wrapped in try/except so a replaced stream (Jupyter,
test harness) logs a warning and continues rather than crashing the CLI.

The reconfigure cascades through every `mempalace` subcommand
(`init`/`mine`/`search`/`status`/`hook`/etc.) and through the interactive
flows that read non-ASCII names via `input()` (onboarding, entity
detector, room detector). With this commit the package's three
user-facing entry points (`mempalace`, `mempalace-mcp`, and
`python -m mempalace.fact_checker`) all reconfigure stdio identically on
Windows.
This commit is contained in:
mvalentsev
2026-04-30 15:00:37 +05:00
parent 7cee74c8c8
commit 32f4dfa26d
2 changed files with 79 additions and 0 deletions
+27
View File
@@ -935,7 +935,34 @@ def cmd_compress(args):
print(" (dry run -- nothing stored)") print(" (dry run -- nothing stored)")
def _reconfigure_stdio_utf8_on_windows():
"""Decode stdio as UTF-8 on Windows for the primary `mempalace` CLI.
Without this, Python defaults stdio to the system ANSI codepage
(cp1252/cp1251/cp950 depending on locale). That mojibakes non-ASCII
content piped in (`mempalace search ... < query.txt`) or piped out
(`mempalace search "..." > out.txt`) when verbatim drawer text or
wing/room names contain non-Latin characters.
"""
if sys.platform != "win32":
return
for name in ("stdin", "stdout", "stderr"):
stream = getattr(sys, name, None)
reconfigure = getattr(stream, "reconfigure", None)
if reconfigure is None:
continue
try:
reconfigure(encoding="utf-8", errors="strict")
except Exception as exc:
print(
f"WARNING: Could not reconfigure {name} to UTF-8: {exc}",
file=sys.stderr,
)
def main(): def main():
_reconfigure_stdio_utf8_on_windows()
version_label = f"MemPalace {__version__}" version_label = f"MemPalace {__version__}"
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="MemPalace — Give your AI a memory. No API key required.", description="MemPalace — Give your AI a memory. No API key required.",
+52
View File
@@ -1042,3 +1042,55 @@ def test_cmd_repair_trailing_slash_does_not_recurse():
palace_path = os.path.expanduser(args.palace).rstrip(os.sep) palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
backup_path = palace_path + ".backup" backup_path = palace_path + ".backup"
assert not backup_path.startswith(palace_path + os.sep) assert not backup_path.startswith(palace_path + os.sep)
# ── stdio reconfigure on Windows ─────────────────────────────────────
class _ReconfigurableStringIO:
def __init__(self):
self.reconfigure_calls = []
def reconfigure(self, **kwargs):
self.reconfigure_calls.append(kwargs)
def test_reconfigures_stdio_to_utf8_on_windows():
"""Windows `mempalace` CLI must decode/encode stdio as UTF-8.
Without this, piped non-ASCII input (`mempalace search ... < q.txt`)
or piped non-ASCII output (`mempalace search "..." > out.txt`) is
mojibaked through the system ANSI codepage on non-Latin Windows
locales (cp1252/cp1251/cp950).
"""
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
stdin = _ReconfigurableStringIO()
stdout = _ReconfigurableStringIO()
stderr = _ReconfigurableStringIO()
with (
patch.object(sys, "platform", "win32"),
patch.object(sys, "stdin", stdin),
patch.object(sys, "stdout", stdout),
patch.object(sys, "stderr", stderr),
):
_reconfigure_stdio_utf8_on_windows()
expected = {"encoding": "utf-8", "errors": "strict"}
assert stdin.reconfigure_calls == [expected]
assert stdout.reconfigure_calls == [expected]
assert stderr.reconfigure_calls == [expected]
def test_reconfigure_stdio_is_noop_off_windows():
"""Linux/macOS already default to UTF-8 stdio -- helper must not touch streams."""
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
stdin = _ReconfigurableStringIO()
with (
patch.object(sys, "platform", "linux"),
patch.object(sys, "stdin", stdin),
):
_reconfigure_stdio_utf8_on_windows()
assert stdin.reconfigure_calls == []