Merge pull request #1215 from arnoldwender/fix/entity-registry-atomic-write

fix(entity_registry): atomic write to prevent partial corruption on crash
This commit is contained in:
Igor Lins e Silva
2026-05-06 01:32:46 -03:00
committed by GitHub
2 changed files with 73 additions and 2 deletions
+27 -2
View File
@@ -16,6 +16,7 @@ Usage:
"""
import json
import os
import re
import urllib.request
import urllib.parse
@@ -320,11 +321,35 @@ class EntityRegistry:
self._path.parent.chmod(0o700)
except (OSError, NotImplementedError):
pass
self._path.write_text(json.dumps(self._data, indent=2), encoding="utf-8")
# Atomic write: serialize to a sibling temp file in the same dir
# (so os.replace stays on one filesystem), fsync, then rename over
# the target. A crash mid-write leaves the previous registry intact
# instead of a half-written file or an empty file from the truncate.
payload = json.dumps(self._data, indent=2)
tmp_path = self._path.with_name(self._path.name + ".tmp")
with open(tmp_path, "w", encoding="utf-8") as f:
f.write(payload)
f.flush()
os.fsync(f.fileno())
try:
self._path.chmod(0o600)
tmp_path.chmod(0o600)
except (OSError, NotImplementedError):
pass
os.replace(tmp_path, self._path)
# On ext4 (and similar) the rename's durability across power loss
# requires an additional fsync on the parent directory. Without it,
# the kernel can ack the rename and a crash reverts to the state
# where the temp file is present and the target is at the old version.
try:
dir_fd = os.open(str(self._path.parent), os.O_RDONLY)
try:
os.fsync(dir_fd)
finally:
os.close(dir_fd)
except OSError:
# Windows and some special filesystems reject directory fds — they
# have different durability semantics on rename anyway.
pass
@staticmethod
def _empty() -> dict:
+46
View File
@@ -2,6 +2,8 @@
from unittest.mock import patch
import pytest
from mempalace.entity_registry import (
COMMON_ENGLISH_WORDS,
PERSON_CONTEXT_PATTERNS,
@@ -71,6 +73,50 @@ def test_save_creates_file(tmp_path):
assert (tmp_path / "entity_registry.json").exists()
def test_save_is_atomic_does_not_leave_tmp(tmp_path):
# Atomic write must not leave the .tmp sidecar file after a successful save.
registry = EntityRegistry.load(config_dir=tmp_path)
registry.save()
leftover = list(tmp_path.glob("entity_registry.json.tmp*"))
assert leftover == [], f"atomic write leaked tmp file(s): {leftover}"
def test_save_preserves_previous_on_serialization_failure(tmp_path, monkeypatch):
# If serialization fails mid-write, the previous registry must remain
# intact — this is the whole point of atomic write vs truncating in place.
registry = EntityRegistry.load(config_dir=tmp_path)
registry.seed(
mode="personal",
people=[{"name": "Alice", "relationship": "friend", "context": "personal"}],
projects=[],
)
registry.save()
target = tmp_path / "entity_registry.json"
original = target.read_text(encoding="utf-8")
# Force os.replace to raise — simulates filesystem full / permission flip
# AFTER the temp file is written but BEFORE the rename completes.
import os as _os
real_replace = _os.replace
def boom(src, dst):
raise OSError("simulated rename failure")
monkeypatch.setattr(_os, "replace", boom)
with pytest.raises(OSError):
registry.seed(
mode="personal",
people=[{"name": "Bob", "relationship": "friend", "context": "personal"}],
projects=[],
)
registry.save()
# Restore os.replace before reading so the assertion can rely on it.
monkeypatch.setattr(_os, "replace", real_replace)
assert target.read_text(encoding="utf-8") == original
# ── seed ────────────────────────────────────────────────────────────────