fix: MCP null args hang, repair infinite recursion, OOM on large files

Three critical bugfixes: 1. MCP server hangs on null arguments (#394) — `params.get("arguments", {})` returns None when JSON has `"arguments": null`. Changed to `or {}`. 2. cmd_repair infinite recursion (#395) — trailing slash on palace_path caused backup_path to be inside the source dir. Strip trailing sep. 3. OOM on large transcript files (#396) — split_mega_files.py and normalize.py load entire files into memory. Added 500MB safety limit with clear skip/error messages. Closes #394, #395, #396.
2026-04-09 09:49:58 -07:00
parent 322727030f
commit 0720fb84f8
4 changed files with 13 additions and 1 deletions
@@ -202,6 +202,7 @@ def cmd_repair(args):
    print(f"  Extracted {len(all_ids)} drawers")

    # Backup and rebuild
+    palace_path = palace_path.rstrip(os.sep)
    backup_path = palace_path + ".backup"
    if os.path.exists(backup_path):
        shutil.rmtree(backup_path)
@@ -881,7 +881,7 @@ def handle_request(request):
        }
    elif method == "tools/call":
        tool_name = params.get("name")
-        tool_args = params.get("arguments", {})
+        tool_args = params.get("arguments") or {}
        if tool_name not in TOOLS:
            return {
                "jsonrpc": "2.0",
@@ -26,6 +26,9 @@ def normalize(filepath: str) -> str:
    Plain text files pass through unchanged.
    """
    try:
+        file_size = os.path.getsize(filepath)
+        if file_size > 500 * 1024 * 1024:  # 500 MB safety limit
+            raise IOError(f"File too large ({file_size // (1024*1024)} MB): {filepath}")
        with open(filepath, "r", encoding="utf-8", errors="replace") as f:
            content = f.read()
    except OSError as e:
@@ -182,6 +182,10 @@ def split_file(filepath, output_dir, dry_run=False):
    Returns list of output paths written (or would be written if dry_run).
    """
    path = Path(filepath)
+    max_size = 500 * 1024 * 1024  # 500 MB safety limit
+    if path.stat().st_size > max_size:
+        print(f"  SKIP: {path.name} exceeds {max_size // (1024*1024)} MB limit")
+        return []
    lines = path.read_text(errors="replace").splitlines(keepends=True)

    boundaries = find_session_boundaries(lines)
@@ -266,7 +270,11 @@ def main():
        files = sorted(src_dir.glob("*.txt"))

    mega_files = []
+    max_scan_size = 500 * 1024 * 1024  # 500 MB
    for f in files:
+        if f.stat().st_size > max_scan_size:
+            print(f"  SKIP: {f.name} exceeds {max_scan_size // (1024*1024)} MB limit")
+            continue
        lines = f.read_text(errors="replace").splitlines(keepends=True)
        boundaries = find_session_boundaries(lines)
        if len(boundaries) >= args.min_sessions: