fix(cli): reconfigure stdio to UTF-8 on Windows
The primary `mempalace` console_script (`cli.py:main()`) reads non-ASCII arguments via piped stdin and writes verbatim drawer text / wing names through `print()`. On Windows, Python defaults stdio to the system ANSI codepage (cp1252/cp1251/cp950), so: - `mempalace search "..." > out.txt` mojibakes any drawer text containing non-Latin characters - `mempalace ... < input.txt` mojibakes piped non-ASCII input Reconfigure stdin/stdout/stderr to UTF-8 (`errors="strict"`) at the top of `main()`, mirroring the helper added in this PR for fact_checker's `__main__` block. Wrapped in try/except so a replaced stream (Jupyter, test harness) logs a warning and continues rather than crashing the CLI. The reconfigure cascades through every `mempalace` subcommand (`init`/`mine`/`search`/`status`/`hook`/etc.) and through the interactive flows that read non-ASCII names via `input()` (onboarding, entity detector, room detector). With this commit the package's three user-facing entry points (`mempalace`, `mempalace-mcp`, and `python -m mempalace.fact_checker`) all reconfigure stdio identically on Windows.
This commit is contained in:
@@ -935,7 +935,34 @@ def cmd_compress(args):
|
||||
print(" (dry run -- nothing stored)")
|
||||
|
||||
|
||||
def _reconfigure_stdio_utf8_on_windows():
|
||||
"""Decode stdio as UTF-8 on Windows for the primary `mempalace` CLI.
|
||||
|
||||
Without this, Python defaults stdio to the system ANSI codepage
|
||||
(cp1252/cp1251/cp950 depending on locale). That mojibakes non-ASCII
|
||||
content piped in (`mempalace search ... < query.txt`) or piped out
|
||||
(`mempalace search "..." > out.txt`) when verbatim drawer text or
|
||||
wing/room names contain non-Latin characters.
|
||||
"""
|
||||
if sys.platform != "win32":
|
||||
return
|
||||
for name in ("stdin", "stdout", "stderr"):
|
||||
stream = getattr(sys, name, None)
|
||||
reconfigure = getattr(stream, "reconfigure", None)
|
||||
if reconfigure is None:
|
||||
continue
|
||||
try:
|
||||
reconfigure(encoding="utf-8", errors="strict")
|
||||
except Exception as exc:
|
||||
print(
|
||||
f"WARNING: Could not reconfigure {name} to UTF-8: {exc}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
_reconfigure_stdio_utf8_on_windows()
|
||||
|
||||
version_label = f"MemPalace {__version__}"
|
||||
parser = argparse.ArgumentParser(
|
||||
description="MemPalace — Give your AI a memory. No API key required.",
|
||||
|
||||
@@ -1042,3 +1042,55 @@ def test_cmd_repair_trailing_slash_does_not_recurse():
|
||||
palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
|
||||
backup_path = palace_path + ".backup"
|
||||
assert not backup_path.startswith(palace_path + os.sep)
|
||||
|
||||
|
||||
# ── stdio reconfigure on Windows ─────────────────────────────────────
|
||||
|
||||
|
||||
class _ReconfigurableStringIO:
|
||||
def __init__(self):
|
||||
self.reconfigure_calls = []
|
||||
|
||||
def reconfigure(self, **kwargs):
|
||||
self.reconfigure_calls.append(kwargs)
|
||||
|
||||
|
||||
def test_reconfigures_stdio_to_utf8_on_windows():
|
||||
"""Windows `mempalace` CLI must decode/encode stdio as UTF-8.
|
||||
|
||||
Without this, piped non-ASCII input (`mempalace search ... < q.txt`)
|
||||
or piped non-ASCII output (`mempalace search "..." > out.txt`) is
|
||||
mojibaked through the system ANSI codepage on non-Latin Windows
|
||||
locales (cp1252/cp1251/cp950).
|
||||
"""
|
||||
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
|
||||
|
||||
stdin = _ReconfigurableStringIO()
|
||||
stdout = _ReconfigurableStringIO()
|
||||
stderr = _ReconfigurableStringIO()
|
||||
with (
|
||||
patch.object(sys, "platform", "win32"),
|
||||
patch.object(sys, "stdin", stdin),
|
||||
patch.object(sys, "stdout", stdout),
|
||||
patch.object(sys, "stderr", stderr),
|
||||
):
|
||||
_reconfigure_stdio_utf8_on_windows()
|
||||
|
||||
expected = {"encoding": "utf-8", "errors": "strict"}
|
||||
assert stdin.reconfigure_calls == [expected]
|
||||
assert stdout.reconfigure_calls == [expected]
|
||||
assert stderr.reconfigure_calls == [expected]
|
||||
|
||||
|
||||
def test_reconfigure_stdio_is_noop_off_windows():
|
||||
"""Linux/macOS already default to UTF-8 stdio -- helper must not touch streams."""
|
||||
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
|
||||
|
||||
stdin = _ReconfigurableStringIO()
|
||||
with (
|
||||
patch.object(sys, "platform", "linux"),
|
||||
patch.object(sys, "stdin", stdin),
|
||||
):
|
||||
_reconfigure_stdio_utf8_on_windows()
|
||||
|
||||
assert stdin.reconfigure_calls == []
|
||||
|
||||
Reference in New Issue
Block a user