diff --git a/mempalace/entity_registry.py b/mempalace/entity_registry.py index 78d8a8b..c8ac517 100644 --- a/mempalace/entity_registry.py +++ b/mempalace/entity_registry.py @@ -16,6 +16,7 @@ Usage: """ import json +import os import re import urllib.request import urllib.parse @@ -320,11 +321,35 @@ class EntityRegistry: self._path.parent.chmod(0o700) except (OSError, NotImplementedError): pass - self._path.write_text(json.dumps(self._data, indent=2), encoding="utf-8") + # Atomic write: serialize to a sibling temp file in the same dir + # (so os.replace stays on one filesystem), fsync, then rename over + # the target. A crash mid-write leaves the previous registry intact + # instead of a half-written file or an empty file from the truncate. + payload = json.dumps(self._data, indent=2) + tmp_path = self._path.with_name(self._path.name + ".tmp") + with open(tmp_path, "w", encoding="utf-8") as f: + f.write(payload) + f.flush() + os.fsync(f.fileno()) try: - self._path.chmod(0o600) + tmp_path.chmod(0o600) except (OSError, NotImplementedError): pass + os.replace(tmp_path, self._path) + # On ext4 (and similar) the rename's durability across power loss + # requires an additional fsync on the parent directory. Without it, + # the kernel can ack the rename and a crash reverts to the state + # where the temp file is present and the target is at the old version. + try: + dir_fd = os.open(str(self._path.parent), os.O_RDONLY) + try: + os.fsync(dir_fd) + finally: + os.close(dir_fd) + except OSError: + # Windows and some special filesystems reject directory fds — they + # have different durability semantics on rename anyway. + pass @staticmethod def _empty() -> dict: diff --git a/tests/test_entity_registry.py b/tests/test_entity_registry.py index c857a07..a5f237c 100644 --- a/tests/test_entity_registry.py +++ b/tests/test_entity_registry.py @@ -2,6 +2,8 @@ from unittest.mock import patch +import pytest + from mempalace.entity_registry import ( COMMON_ENGLISH_WORDS, PERSON_CONTEXT_PATTERNS, @@ -71,6 +73,50 @@ def test_save_creates_file(tmp_path): assert (tmp_path / "entity_registry.json").exists() +def test_save_is_atomic_does_not_leave_tmp(tmp_path): + # Atomic write must not leave the .tmp sidecar file after a successful save. + registry = EntityRegistry.load(config_dir=tmp_path) + registry.save() + leftover = list(tmp_path.glob("entity_registry.json.tmp*")) + assert leftover == [], f"atomic write leaked tmp file(s): {leftover}" + + +def test_save_preserves_previous_on_serialization_failure(tmp_path, monkeypatch): + # If serialization fails mid-write, the previous registry must remain + # intact — this is the whole point of atomic write vs truncating in place. + registry = EntityRegistry.load(config_dir=tmp_path) + registry.seed( + mode="personal", + people=[{"name": "Alice", "relationship": "friend", "context": "personal"}], + projects=[], + ) + registry.save() + target = tmp_path / "entity_registry.json" + original = target.read_text(encoding="utf-8") + + # Force os.replace to raise — simulates filesystem full / permission flip + # AFTER the temp file is written but BEFORE the rename completes. + import os as _os + + real_replace = _os.replace + + def boom(src, dst): + raise OSError("simulated rename failure") + + monkeypatch.setattr(_os, "replace", boom) + with pytest.raises(OSError): + registry.seed( + mode="personal", + people=[{"name": "Bob", "relationship": "friend", "context": "personal"}], + projects=[], + ) + registry.save() + + # Restore os.replace before reading so the assertion can rely on it. + monkeypatch.setattr(_os, "replace", real_replace) + assert target.read_text(encoding="utf-8") == original + + # ── seed ────────────────────────────────────────────────────────────────