fix(mcp): redirect stdout to stderr during import to protect JSON-RPC channel (#225) (#864)

* fix(mcp): redirect stdout to stderr during import to protect JSON-RPC channel (#225)

Fixes #225.

Several transitive dependencies (chromadb, onnxruntime, posthog) print
banners and warnings to stdout — sometimes at the C level — during the
mcp_server import chain. Because the MCP protocol multiplexes JSON-RPC
over stdio, any non-JSON output on stdout corrupted the message stream
and broke Claude Desktop's parser with errors like:

  MCP mempalace: Unexpected token '*', "**********"... is not valid JSON
  MCP mempalace: Unexpected token 'E', "EP Error D"... is not valid JSON
  MCP mempalace: Unexpected token 'F', "Falling ba"... is not valid JSON

Reproduced on Windows 11 with mempalace 3.0.0 / Python 3.10 / Claude
Desktop 1.1062.0.

Fix: at module load, redirect stdout to stderr at both the Python level
(sys.stdout = sys.stderr) and the file-descriptor level (os.dup2(2, 1))
to catch C-level prints, while preserving the real stdout for later
restore. main() calls _restore_stdout() right before entering the
protocol loop so JSON-RPC responses still go to the real stdout.

Adds tests/test_mcp_stdio_protection.py with three regression tests:
- module-level redirect is in place after import
- _restore_stdout() restores the original stdout (idempotent)
- 'python -m mempalace.mcp_server' with empty stdin emits no stdout

* style: reformat with ruff 0.4 (CI version) for #225
This commit is contained in:
Arnold Wender
2026-04-15 09:26:51 +02:00
committed by GitHub
parent 0aee6f3ed9
commit b226251ddf
2 changed files with 136 additions and 14 deletions
+53 -14
View File
@@ -20,22 +20,47 @@ Tools (maintenance):
mempalace_reconnect — force cache invalidation and reconnect after external writes
"""
import argparse
import os
import sys
import json
import logging
import hashlib
import time
from datetime import datetime
from pathlib import Path
from .config import MempalaceConfig, sanitize_kg_value, sanitize_name, sanitize_content
from .version import __version__
from .backends.chroma import ChromaBackend, ChromaCollection
from .query_sanitizer import sanitize_query
from .searcher import search_memories
from .palace_graph import (
# --- MCP stdio protection (issue #225) -----------------------------------
# The MCP protocol multiplexes JSON-RPC over stdio: stdout MUST carry only
# valid JSON-RPC messages, stderr is for human-readable logs. Some
# transitive dependencies (chromadb → onnxruntime, posthog telemetry) print
# banners and error messages directly to stdout — sometimes at C level —
# which breaks Claude Desktop's JSON parser. Redirect stdout → stderr at
# both the Python and file-descriptor level before heavy imports, then
# restore the real stdout in main() before entering the protocol loop.
_REAL_STDOUT = sys.stdout
_REAL_STDOUT_FD = None
try:
_REAL_STDOUT_FD = os.dup(1)
os.dup2(2, 1)
except (OSError, AttributeError):
# Environments without fd-level stdio (embedded interpreters, some test
# harnesses). The Python-level redirect below still applies.
pass
sys.stdout = sys.stderr
import argparse # noqa: E402 (deferred until after stdio protection above)
import json # noqa: E402
import logging # noqa: E402
import hashlib # noqa: E402
import time # noqa: E402
from datetime import datetime # noqa: E402
from pathlib import Path # noqa: E402
from .config import ( # noqa: E402
MempalaceConfig,
sanitize_kg_value,
sanitize_name,
sanitize_content,
)
from .version import __version__ # noqa: E402
from .backends.chroma import ChromaBackend, ChromaCollection # noqa: E402
from .query_sanitizer import sanitize_query # noqa: E402
from .searcher import search_memories # noqa: E402
from .palace_graph import ( # noqa: E402
traverse,
find_tunnels,
graph_stats,
@@ -45,7 +70,7 @@ from .palace_graph import (
follow_tunnels,
)
from .knowledge_graph import KnowledgeGraph
from .knowledge_graph import KnowledgeGraph # noqa: E402
logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr)
logger = logging.getLogger("mempalace_mcp")
@@ -1645,7 +1670,21 @@ def handle_request(request):
}
def _restore_stdout():
"""Restore real stdout for MCP JSON-RPC output (see issue #225)."""
global _REAL_STDOUT, _REAL_STDOUT_FD
if _REAL_STDOUT_FD is not None:
try:
os.dup2(_REAL_STDOUT_FD, 1)
os.close(_REAL_STDOUT_FD)
except OSError:
pass
_REAL_STDOUT_FD = None
sys.stdout = _REAL_STDOUT
def main():
_restore_stdout()
logger.info("MemPalace MCP Server starting...")
while True:
try:
+83
View File
@@ -0,0 +1,83 @@
"""Regression tests for issue #225 — MCP stdio protection.
The MCP protocol multiplexes JSON-RPC over stdio. Stdout MUST carry only
valid JSON-RPC messages. Several transitive deps (chromadb → onnxruntime,
posthog telemetry) print banners and warnings to stdout — sometimes at
the C level — which broke Claude Desktop's JSON parser on Windows.
The fix in mcp_server.py redirects stdout → stderr at both the Python
and file-descriptor level during module import, then restores the real
stdout in main() before entering the protocol loop.
"""
import subprocess
import sys
import textwrap
def test_module_import_redirects_stdout_to_stderr():
"""At import time, sys.stdout must point at sys.stderr so any stray
print() from a transitive dependency is sent to stderr."""
code = textwrap.dedent(
"""
import sys
original_stdout = sys.stdout
from mempalace import mcp_server
assert sys.stdout is sys.stderr, (
f"Expected sys.stdout to be redirected to sys.stderr, "
f"got: {sys.stdout!r}"
)
assert mcp_server._REAL_STDOUT is original_stdout, (
"mcp_server._REAL_STDOUT must hold the original stdout"
)
print("OK", file=sys.stderr)
"""
)
result = subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
timeout=60,
)
assert result.returncode == 0, f"stdout: {result.stdout!r}\nstderr: {result.stderr!r}"
def test_restore_stdout_returns_real_stdout():
"""_restore_stdout() must reassign sys.stdout to the original handle
so main() can write JSON-RPC responses to the real stdout."""
code = textwrap.dedent(
"""
import sys
original_stdout = sys.stdout
from mempalace import mcp_server
assert sys.stdout is sys.stderr
mcp_server._restore_stdout()
assert sys.stdout is original_stdout, (
f"After _restore_stdout(), sys.stdout must be the original; "
f"got: {sys.stdout!r}"
)
mcp_server._restore_stdout() # idempotent
print("OK", file=sys.stderr)
"""
)
result = subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
timeout=60,
)
assert result.returncode == 0, f"stdout: {result.stdout!r}\nstderr: {result.stderr!r}"
def test_mcp_server_no_stdout_noise_on_clean_exit():
"""`python -m mempalace.mcp_server` with empty stdin must produce
nothing on stdout. Empty input → readline() returns '' → main()
breaks out cleanly. Any stdout content here would corrupt the
JSON-RPC stream in real use."""
proc = subprocess.run(
[sys.executable, "-m", "mempalace.mcp_server"],
input=b"",
capture_output=True,
timeout=60,
)
assert (
proc.stdout == b""
), f"stdout must be empty before the first JSON-RPC response, but got: {proc.stdout!r}"