fix(entity_registry): atomic write to prevent partial corruption on crash

EntityRegistry.save() called Path.write_text() directly, which truncates
the target file and then writes — so a crash mid-write (power loss, OOM,
filesystem-full mid-flush) leaves an empty or half-written
entity_registry.json. The whole people/projects map is lost; the system
falls back to an empty registry on next load.

Switch to the standard atomic-write pattern: serialize to a sibling
.tmp file in the same directory (so os.replace stays on one filesystem),
fsync, chmod 0o600, then os.replace over the target. The replace is
atomic on POSIX and Windows, so any crash leaves the previous registry
intact instead of a truncated file.

Tests cover: no leftover .tmp on success, and previous content preserved
when os.replace itself raises mid-save.
This commit is contained in:
Arnold Wender
2026-04-26 13:01:55 +02:00
parent 1888b671e2
commit 4f36145c2e
2 changed files with 59 additions and 2 deletions
+46
View File
@@ -2,6 +2,8 @@
from unittest.mock import patch
import pytest
from mempalace.entity_registry import (
COMMON_ENGLISH_WORDS,
PERSON_CONTEXT_PATTERNS,
@@ -71,6 +73,50 @@ def test_save_creates_file(tmp_path):
assert (tmp_path / "entity_registry.json").exists()
def test_save_is_atomic_does_not_leave_tmp(tmp_path):
# Atomic write must not leave the .tmp sidecar file after a successful save.
registry = EntityRegistry.load(config_dir=tmp_path)
registry.save()
leftover = list(tmp_path.glob("entity_registry.json.tmp*"))
assert leftover == [], f"atomic write leaked tmp file(s): {leftover}"
def test_save_preserves_previous_on_serialization_failure(tmp_path, monkeypatch):
# If serialization fails mid-write, the previous registry must remain
# intact — this is the whole point of atomic write vs truncating in place.
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Alice", "relationship": "friend", "context": "personal"}],
projects=[],
)
registry.save()
target = tmp_path / "entity_registry.json"
original = target.read_text(encoding="utf-8")
# Force os.replace to raise — simulates filesystem full / permission flip
# AFTER the temp file is written but BEFORE the rename completes.
import os as _os
real_replace = _os.replace
def boom(src, dst):
raise OSError("simulated rename failure")
monkeypatch.setattr(_os, "replace", boom)
with pytest.raises(OSError):
registry.seed(
mode="personal",
people=[{"name": "Bob", "relationship": "friend", "context": "personal"}],
projects=[],
)
registry.save()
# Restore os.replace before reading so the assertion can rely on it.
monkeypatch.setattr(_os, "replace", real_replace)
assert target.read_text(encoding="utf-8") == original
# ── seed ────────────────────────────────────────────────────────────────