fix(cli): reconfigure stdio to UTF-8 on Windows
The primary `mempalace` console_script (`cli.py:main()`) reads non-ASCII arguments via piped stdin and writes verbatim drawer text / wing names through `print()`. On Windows, Python defaults stdio to the system ANSI codepage (cp1252/cp1251/cp950), so: - `mempalace search "..." > out.txt` mojibakes any drawer text containing non-Latin characters - `mempalace ... < input.txt` mojibakes piped non-ASCII input Reconfigure stdin/stdout/stderr to UTF-8 (`errors="strict"`) at the top of `main()`, mirroring the helper added in this PR for fact_checker's `__main__` block. Wrapped in try/except so a replaced stream (Jupyter, test harness) logs a warning and continues rather than crashing the CLI. The reconfigure cascades through every `mempalace` subcommand (`init`/`mine`/`search`/`status`/`hook`/etc.) and through the interactive flows that read non-ASCII names via `input()` (onboarding, entity detector, room detector). With this commit the package's three user-facing entry points (`mempalace`, `mempalace-mcp`, and `python -m mempalace.fact_checker`) all reconfigure stdio identically on Windows.
This commit is contained in:
@@ -935,7 +935,34 @@ def cmd_compress(args):
|
|||||||
print(" (dry run -- nothing stored)")
|
print(" (dry run -- nothing stored)")
|
||||||
|
|
||||||
|
|
||||||
|
def _reconfigure_stdio_utf8_on_windows():
|
||||||
|
"""Decode stdio as UTF-8 on Windows for the primary `mempalace` CLI.
|
||||||
|
|
||||||
|
Without this, Python defaults stdio to the system ANSI codepage
|
||||||
|
(cp1252/cp1251/cp950 depending on locale). That mojibakes non-ASCII
|
||||||
|
content piped in (`mempalace search ... < query.txt`) or piped out
|
||||||
|
(`mempalace search "..." > out.txt`) when verbatim drawer text or
|
||||||
|
wing/room names contain non-Latin characters.
|
||||||
|
"""
|
||||||
|
if sys.platform != "win32":
|
||||||
|
return
|
||||||
|
for name in ("stdin", "stdout", "stderr"):
|
||||||
|
stream = getattr(sys, name, None)
|
||||||
|
reconfigure = getattr(stream, "reconfigure", None)
|
||||||
|
if reconfigure is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
reconfigure(encoding="utf-8", errors="strict")
|
||||||
|
except Exception as exc:
|
||||||
|
print(
|
||||||
|
f"WARNING: Could not reconfigure {name} to UTF-8: {exc}",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
_reconfigure_stdio_utf8_on_windows()
|
||||||
|
|
||||||
version_label = f"MemPalace {__version__}"
|
version_label = f"MemPalace {__version__}"
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="MemPalace — Give your AI a memory. No API key required.",
|
description="MemPalace — Give your AI a memory. No API key required.",
|
||||||
|
|||||||
@@ -1042,3 +1042,55 @@ def test_cmd_repair_trailing_slash_does_not_recurse():
|
|||||||
palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
|
palace_path = os.path.expanduser(args.palace).rstrip(os.sep)
|
||||||
backup_path = palace_path + ".backup"
|
backup_path = palace_path + ".backup"
|
||||||
assert not backup_path.startswith(palace_path + os.sep)
|
assert not backup_path.startswith(palace_path + os.sep)
|
||||||
|
|
||||||
|
|
||||||
|
# ── stdio reconfigure on Windows ─────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class _ReconfigurableStringIO:
|
||||||
|
def __init__(self):
|
||||||
|
self.reconfigure_calls = []
|
||||||
|
|
||||||
|
def reconfigure(self, **kwargs):
|
||||||
|
self.reconfigure_calls.append(kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def test_reconfigures_stdio_to_utf8_on_windows():
|
||||||
|
"""Windows `mempalace` CLI must decode/encode stdio as UTF-8.
|
||||||
|
|
||||||
|
Without this, piped non-ASCII input (`mempalace search ... < q.txt`)
|
||||||
|
or piped non-ASCII output (`mempalace search "..." > out.txt`) is
|
||||||
|
mojibaked through the system ANSI codepage on non-Latin Windows
|
||||||
|
locales (cp1252/cp1251/cp950).
|
||||||
|
"""
|
||||||
|
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
|
||||||
|
|
||||||
|
stdin = _ReconfigurableStringIO()
|
||||||
|
stdout = _ReconfigurableStringIO()
|
||||||
|
stderr = _ReconfigurableStringIO()
|
||||||
|
with (
|
||||||
|
patch.object(sys, "platform", "win32"),
|
||||||
|
patch.object(sys, "stdin", stdin),
|
||||||
|
patch.object(sys, "stdout", stdout),
|
||||||
|
patch.object(sys, "stderr", stderr),
|
||||||
|
):
|
||||||
|
_reconfigure_stdio_utf8_on_windows()
|
||||||
|
|
||||||
|
expected = {"encoding": "utf-8", "errors": "strict"}
|
||||||
|
assert stdin.reconfigure_calls == [expected]
|
||||||
|
assert stdout.reconfigure_calls == [expected]
|
||||||
|
assert stderr.reconfigure_calls == [expected]
|
||||||
|
|
||||||
|
|
||||||
|
def test_reconfigure_stdio_is_noop_off_windows():
|
||||||
|
"""Linux/macOS already default to UTF-8 stdio -- helper must not touch streams."""
|
||||||
|
from mempalace.cli import _reconfigure_stdio_utf8_on_windows
|
||||||
|
|
||||||
|
stdin = _ReconfigurableStringIO()
|
||||||
|
with (
|
||||||
|
patch.object(sys, "platform", "linux"),
|
||||||
|
patch.object(sys, "stdin", stdin),
|
||||||
|
):
|
||||||
|
_reconfigure_stdio_utf8_on_windows()
|
||||||
|
|
||||||
|
assert stdin.reconfigure_calls == []
|
||||||
|
|||||||
Reference in New Issue
Block a user