Merge pull request #1215 from arnoldwender/fix/entity-registry-atomic-write
fix(entity_registry): atomic write to prevent partial corruption on crash
This commit is contained in:
@@ -16,6 +16,7 @@ Usage:
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
@@ -320,11 +321,35 @@ class EntityRegistry:
|
||||
self._path.parent.chmod(0o700)
|
||||
except (OSError, NotImplementedError):
|
||||
pass
|
||||
self._path.write_text(json.dumps(self._data, indent=2), encoding="utf-8")
|
||||
# Atomic write: serialize to a sibling temp file in the same dir
|
||||
# (so os.replace stays on one filesystem), fsync, then rename over
|
||||
# the target. A crash mid-write leaves the previous registry intact
|
||||
# instead of a half-written file or an empty file from the truncate.
|
||||
payload = json.dumps(self._data, indent=2)
|
||||
tmp_path = self._path.with_name(self._path.name + ".tmp")
|
||||
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||
f.write(payload)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
try:
|
||||
self._path.chmod(0o600)
|
||||
tmp_path.chmod(0o600)
|
||||
except (OSError, NotImplementedError):
|
||||
pass
|
||||
os.replace(tmp_path, self._path)
|
||||
# On ext4 (and similar) the rename's durability across power loss
|
||||
# requires an additional fsync on the parent directory. Without it,
|
||||
# the kernel can ack the rename and a crash reverts to the state
|
||||
# where the temp file is present and the target is at the old version.
|
||||
try:
|
||||
dir_fd = os.open(str(self._path.parent), os.O_RDONLY)
|
||||
try:
|
||||
os.fsync(dir_fd)
|
||||
finally:
|
||||
os.close(dir_fd)
|
||||
except OSError:
|
||||
# Windows and some special filesystems reject directory fds — they
|
||||
# have different durability semantics on rename anyway.
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _empty() -> dict:
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from mempalace.entity_registry import (
|
||||
COMMON_ENGLISH_WORDS,
|
||||
PERSON_CONTEXT_PATTERNS,
|
||||
@@ -71,6 +73,50 @@ def test_save_creates_file(tmp_path):
|
||||
assert (tmp_path / "entity_registry.json").exists()
|
||||
|
||||
|
||||
def test_save_is_atomic_does_not_leave_tmp(tmp_path):
|
||||
# Atomic write must not leave the .tmp sidecar file after a successful save.
|
||||
registry = EntityRegistry.load(config_dir=tmp_path)
|
||||
registry.save()
|
||||
leftover = list(tmp_path.glob("entity_registry.json.tmp*"))
|
||||
assert leftover == [], f"atomic write leaked tmp file(s): {leftover}"
|
||||
|
||||
|
||||
def test_save_preserves_previous_on_serialization_failure(tmp_path, monkeypatch):
|
||||
# If serialization fails mid-write, the previous registry must remain
|
||||
# intact — this is the whole point of atomic write vs truncating in place.
|
||||
registry = EntityRegistry.load(config_dir=tmp_path)
|
||||
registry.seed(
|
||||
mode="personal",
|
||||
people=[{"name": "Alice", "relationship": "friend", "context": "personal"}],
|
||||
projects=[],
|
||||
)
|
||||
registry.save()
|
||||
target = tmp_path / "entity_registry.json"
|
||||
original = target.read_text(encoding="utf-8")
|
||||
|
||||
# Force os.replace to raise — simulates filesystem full / permission flip
|
||||
# AFTER the temp file is written but BEFORE the rename completes.
|
||||
import os as _os
|
||||
|
||||
real_replace = _os.replace
|
||||
|
||||
def boom(src, dst):
|
||||
raise OSError("simulated rename failure")
|
||||
|
||||
monkeypatch.setattr(_os, "replace", boom)
|
||||
with pytest.raises(OSError):
|
||||
registry.seed(
|
||||
mode="personal",
|
||||
people=[{"name": "Bob", "relationship": "friend", "context": "personal"}],
|
||||
projects=[],
|
||||
)
|
||||
registry.save()
|
||||
|
||||
# Restore os.replace before reading so the assertion can rely on it.
|
||||
monkeypatch.setattr(_os, "replace", real_replace)
|
||||
assert target.read_text(encoding="utf-8") == original
|
||||
|
||||
|
||||
# ── seed ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user