Merge pull request #1402 from MemPalace/fix/1296-windows-mine-resilience
fix(miner): harden Windows mine against ONNX bad_alloc + silent partial exits (#1296)
This commit is contained in:
@@ -66,6 +66,8 @@ SKIP_FILENAMES = {
|
|||||||
"mempal.yml",
|
"mempal.yml",
|
||||||
".gitignore",
|
".gitignore",
|
||||||
"package-lock.json",
|
"package-lock.json",
|
||||||
|
"pnpm-lock.yaml",
|
||||||
|
"yarn.lock",
|
||||||
}
|
}
|
||||||
|
|
||||||
CHUNK_SIZE = 800 # chars per drawer
|
CHUNK_SIZE = 800 # chars per drawer
|
||||||
@@ -73,6 +75,13 @@ CHUNK_OVERLAP = 100 # overlap between chunks
|
|||||||
MIN_CHUNK_SIZE = 50 # skip tiny chunks
|
MIN_CHUNK_SIZE = 50 # skip tiny chunks
|
||||||
DRAWER_UPSERT_BATCH_SIZE = 1000
|
DRAWER_UPSERT_BATCH_SIZE = 1000
|
||||||
MAX_FILE_SIZE = 500 * 1024 * 1024 # 500 MB — skip files larger than this.
|
MAX_FILE_SIZE = 500 * 1024 * 1024 # 500 MB — skip files larger than this.
|
||||||
|
# A single file producing more chunks than this is almost always a generated
|
||||||
|
# artifact (CSV/JSON dump, lockfile not in SKIP_FILENAMES, etc.). Embedding
|
||||||
|
# thousands of chunks from one file in one batch has triggered ONNX runtime
|
||||||
|
# `bad allocation` errors on Windows (#1296). The cap is conservative: a
|
||||||
|
# 500-chunk file at CHUNK_SIZE=800 is ~400 KB of source, which covers most
|
||||||
|
# legitimate hand-written content while bounding the worst-case batch.
|
||||||
|
MAX_CHUNKS_PER_FILE = 500
|
||||||
# Long Claude Code sessions and large transcript exports routinely exceed
|
# Long Claude Code sessions and large transcript exports routinely exceed
|
||||||
# 10 MB. The cap exists as a defensive rail against pathological binary
|
# 10 MB. The cap exists as a defensive rail against pathological binary
|
||||||
# files, not as a limit on legitimate text. Per-drawer size is bounded
|
# files, not as a limit on legitimate text. Per-drawer size is bounded
|
||||||
@@ -825,6 +834,13 @@ def process_file(
|
|||||||
room = detect_room(filepath, content, rooms, project_path)
|
room = detect_room(filepath, content, rooms, project_path)
|
||||||
chunks = chunk_text(content, source_file)
|
chunks = chunk_text(content, source_file)
|
||||||
|
|
||||||
|
if len(chunks) > MAX_CHUNKS_PER_FILE:
|
||||||
|
print(
|
||||||
|
f" ! [skip] {filepath.name[:50]:50} produced {len(chunks)} chunks "
|
||||||
|
f"(> {MAX_CHUNKS_PER_FILE}); add to SKIP_FILENAMES or .gitignore"
|
||||||
|
)
|
||||||
|
return 0, room
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(f" [DRY RUN] {filepath.name} -> room:{room} ({len(chunks)} drawers)")
|
print(f" [DRY RUN] {filepath.name} -> room:{room} ({len(chunks)} drawers)")
|
||||||
return len(chunks), room
|
return len(chunks), room
|
||||||
@@ -1167,6 +1183,24 @@ def _mine_impl(
|
|||||||
"already-filed drawers are\n upserted idempotently and will not duplicate.\n"
|
"already-filed drawers are\n upserted idempotently and will not duplicate.\n"
|
||||||
)
|
)
|
||||||
sys.exit(130)
|
sys.exit(130)
|
||||||
|
except Exception as exc:
|
||||||
|
# Without this, an arbitrary exception (ONNX bad_alloc, chromadb HNSW
|
||||||
|
# error, OS fault) propagates and the process exits with no completion
|
||||||
|
# banner — the operator sees only the final progress line and assumes
|
||||||
|
# the mine succeeded (#1296). Print the partial-progress summary the
|
||||||
|
# way we do for KeyboardInterrupt, then re-raise so the original
|
||||||
|
# traceback still surfaces and the exit code is non-zero.
|
||||||
|
print("\n\n Mine aborted by exception.")
|
||||||
|
print(f" files_processed: {files_processed}/{len(files)}")
|
||||||
|
print(f" drawers_filed: {total_drawers}")
|
||||||
|
print(f" last_file: {last_file or '<none>'}")
|
||||||
|
print(f" error: {type(exc).__name__}: {exc}")
|
||||||
|
print(
|
||||||
|
f"\n Re-run `mempalace mine {shlex.quote(project_dir)}` after addressing "
|
||||||
|
"the cause — already-filed\n drawers are upserted idempotently and will "
|
||||||
|
"not duplicate.\n"
|
||||||
|
)
|
||||||
|
raise
|
||||||
finally:
|
finally:
|
||||||
# Clean up the hooks-side PID lock if it points at us. Stale
|
# Clean up the hooks-side PID lock if it points at us. Stale
|
||||||
# entries already pass _pid_alive() == False on POSIX, but
|
# entries already pass _pid_alive() == False on POSIX, but
|
||||||
|
|||||||
@@ -699,6 +699,83 @@ def test_mine_keyboard_interrupt_quotes_path_with_spaces_in_resume_hint(tmp_path
|
|||||||
assert f"mempalace mine {shlex.quote(str(project_root))}" in out
|
assert f"mempalace mine {shlex.quote(str(project_root))}" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_skip_filenames_includes_lockfiles():
|
||||||
|
"""pnpm-lock.yaml and yarn.lock must be skipped alongside package-lock.json
|
||||||
|
so a Windows mine over a typical JS monorepo doesn't OOM the ONNX embedder
|
||||||
|
on a 24K-line lockfile (#1296)."""
|
||||||
|
from mempalace import miner
|
||||||
|
|
||||||
|
assert "package-lock.json" in miner.SKIP_FILENAMES
|
||||||
|
assert "pnpm-lock.yaml" in miner.SKIP_FILENAMES
|
||||||
|
assert "yarn.lock" in miner.SKIP_FILENAMES
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_file_skips_when_chunks_exceed_max(tmp_path, monkeypatch):
|
||||||
|
"""A file producing more than MAX_CHUNKS_PER_FILE chunks must be skipped
|
||||||
|
with a clear message and zero upserts. Generated artifacts (CSVs, lock
|
||||||
|
files not in SKIP_FILENAMES) hit this — the cap is what prevents ONNX
|
||||||
|
bad_alloc on Windows when the embedder is asked to swallow thousands of
|
||||||
|
chunks in one batch (#1296)."""
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
from mempalace import miner
|
||||||
|
|
||||||
|
monkeypatch.setattr(miner, "MAX_CHUNKS_PER_FILE", 5)
|
||||||
|
over_cap = [{"content": f"chunk {i}", "chunk_index": i} for i in range(7)]
|
||||||
|
monkeypatch.setattr(miner, "chunk_text", lambda content, source_file: over_cap)
|
||||||
|
|
||||||
|
source = tmp_path / "huge.csv"
|
||||||
|
source.write_text("col1,col2\n" + "x,y\n" * 500, encoding="utf-8")
|
||||||
|
col = MagicMock()
|
||||||
|
col.get.return_value = {"ids": []}
|
||||||
|
|
||||||
|
drawers, room = miner.process_file(
|
||||||
|
source,
|
||||||
|
tmp_path,
|
||||||
|
col,
|
||||||
|
"wing",
|
||||||
|
[{"name": "general", "description": "General"}],
|
||||||
|
"agent",
|
||||||
|
False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert drawers == 0
|
||||||
|
col.upsert.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
def test_mine_arbitrary_exception_prints_summary_and_reraises(tmp_path, capsys):
|
||||||
|
"""A non-KeyboardInterrupt exception mid-mine must surface a summary
|
||||||
|
banner before propagating, so users don't see a silent exit-0 with no
|
||||||
|
completion message (#1296 Failure 2). Re-raise preserves the traceback
|
||||||
|
and yields a non-zero exit code."""
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
project_root = tmp_path / "proj"
|
||||||
|
project_root.mkdir()
|
||||||
|
_make_minable_project(project_root, n_files=4)
|
||||||
|
palace_path = project_root / "palace"
|
||||||
|
|
||||||
|
call_count = {"n": 0}
|
||||||
|
|
||||||
|
def fake_process_file(*args, **kwargs):
|
||||||
|
call_count["n"] += 1
|
||||||
|
if call_count["n"] == 2:
|
||||||
|
raise RuntimeError("simulated ONNX bad_alloc")
|
||||||
|
return (1, "general")
|
||||||
|
|
||||||
|
with patch("mempalace.miner.process_file", side_effect=fake_process_file):
|
||||||
|
with pytest.raises(RuntimeError, match="simulated ONNX bad_alloc"):
|
||||||
|
mine(str(project_root), str(palace_path))
|
||||||
|
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "Mine aborted by exception." in out
|
||||||
|
assert "files_processed: 1/" in out
|
||||||
|
assert "drawers_filed:" in out
|
||||||
|
assert "RuntimeError: simulated ONNX bad_alloc" in out
|
||||||
|
assert "upserted idempotently" in out
|
||||||
|
|
||||||
|
|
||||||
def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
|
def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
|
||||||
"""Our own PID entry in mine.pid is removed in the finally clause."""
|
"""Our own PID entry in mine.pid is removed in the finally clause."""
|
||||||
import pytest
|
import pytest
|
||||||
|
|||||||
Reference in New Issue
Block a user