fix(entity_registry): atomic write to prevent partial corruption on crash
EntityRegistry.save() called Path.write_text() directly, which truncates the target file and then writes — so a crash mid-write (power loss, OOM, filesystem-full mid-flush) leaves an empty or half-written entity_registry.json. The whole people/projects map is lost; the system falls back to an empty registry on next load. Switch to the standard atomic-write pattern: serialize to a sibling .tmp file in the same directory (so os.replace stays on one filesystem), fsync, chmod 0o600, then os.replace over the target. The replace is atomic on POSIX and Windows, so any crash leaves the previous registry intact instead of a truncated file. Tests cover: no leftover .tmp on success, and previous content preserved when os.replace itself raises mid-save.
This commit is contained in:
@@ -16,6 +16,7 @@ Usage:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
@@ -320,11 +321,21 @@ class EntityRegistry:
|
|||||||
self._path.parent.chmod(0o700)
|
self._path.parent.chmod(0o700)
|
||||||
except (OSError, NotImplementedError):
|
except (OSError, NotImplementedError):
|
||||||
pass
|
pass
|
||||||
self._path.write_text(json.dumps(self._data, indent=2), encoding="utf-8")
|
# Atomic write: serialize to a sibling temp file in the same dir
|
||||||
|
# (so os.replace stays on one filesystem), fsync, then rename over
|
||||||
|
# the target. A crash mid-write leaves the previous registry intact
|
||||||
|
# instead of a half-written file or an empty file from the truncate.
|
||||||
|
payload = json.dumps(self._data, indent=2)
|
||||||
|
tmp_path = self._path.with_name(self._path.name + ".tmp")
|
||||||
|
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(payload)
|
||||||
|
f.flush()
|
||||||
|
os.fsync(f.fileno())
|
||||||
try:
|
try:
|
||||||
self._path.chmod(0o600)
|
tmp_path.chmod(0o600)
|
||||||
except (OSError, NotImplementedError):
|
except (OSError, NotImplementedError):
|
||||||
pass
|
pass
|
||||||
|
os.replace(tmp_path, self._path)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _empty() -> dict:
|
def _empty() -> dict:
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from mempalace.entity_registry import (
|
from mempalace.entity_registry import (
|
||||||
COMMON_ENGLISH_WORDS,
|
COMMON_ENGLISH_WORDS,
|
||||||
PERSON_CONTEXT_PATTERNS,
|
PERSON_CONTEXT_PATTERNS,
|
||||||
@@ -71,6 +73,50 @@ def test_save_creates_file(tmp_path):
|
|||||||
assert (tmp_path / "entity_registry.json").exists()
|
assert (tmp_path / "entity_registry.json").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_is_atomic_does_not_leave_tmp(tmp_path):
|
||||||
|
# Atomic write must not leave the .tmp sidecar file after a successful save.
|
||||||
|
registry = EntityRegistry.load(config_dir=tmp_path)
|
||||||
|
registry.save()
|
||||||
|
leftover = list(tmp_path.glob("entity_registry.json.tmp*"))
|
||||||
|
assert leftover == [], f"atomic write leaked tmp file(s): {leftover}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_preserves_previous_on_serialization_failure(tmp_path, monkeypatch):
|
||||||
|
# If serialization fails mid-write, the previous registry must remain
|
||||||
|
# intact — this is the whole point of atomic write vs truncating in place.
|
||||||
|
registry = EntityRegistry.load(config_dir=tmp_path)
|
||||||
|
registry.seed(
|
||||||
|
mode="personal",
|
||||||
|
people=[{"name": "Alice", "relationship": "friend", "context": "personal"}],
|
||||||
|
projects=[],
|
||||||
|
)
|
||||||
|
registry.save()
|
||||||
|
target = tmp_path / "entity_registry.json"
|
||||||
|
original = target.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# Force os.replace to raise — simulates filesystem full / permission flip
|
||||||
|
# AFTER the temp file is written but BEFORE the rename completes.
|
||||||
|
import os as _os
|
||||||
|
|
||||||
|
real_replace = _os.replace
|
||||||
|
|
||||||
|
def boom(src, dst):
|
||||||
|
raise OSError("simulated rename failure")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_os, "replace", boom)
|
||||||
|
with pytest.raises(OSError):
|
||||||
|
registry.seed(
|
||||||
|
mode="personal",
|
||||||
|
people=[{"name": "Bob", "relationship": "friend", "context": "personal"}],
|
||||||
|
projects=[],
|
||||||
|
)
|
||||||
|
registry.save()
|
||||||
|
|
||||||
|
# Restore os.replace before reading so the assertion can rely on it.
|
||||||
|
monkeypatch.setattr(_os, "replace", real_replace)
|
||||||
|
assert target.read_text(encoding="utf-8") == original
|
||||||
|
|
||||||
|
|
||||||
# ── seed ────────────────────────────────────────────────────────────────
|
# ── seed ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user