fix(palace): reserve byte 0 as lock sentinel for Windows portability

Windows CI surfaced two bugs introduced by the holder-identity write:

1. msvcrt.locking(LK_NBLCK, 1) locks 1 byte at the *current* file
   position. Switching to "a+" mode put the position at end-of-file,
   so two contenders locked different bytes and silently both
   acquired (the test asserts saw [(ok, 1), (ok, 2)] instead of
   ok+busy).

2. With the byte-range lock active on Windows, the locked byte is
   read-blocked for other processes. A contender trying to read the
   holder identity from byte 0 would hit PermissionError.

Switch to "r+" mode (after touch-create) and explicitly seek(0) before
both lock and unlock. Then reserve byte 0 as a pure lock sentinel and
write the holder identity from byte 1 onward. _read_lock_holder reads
from byte 1+, so it never touches the locked byte.

Also bound file growth across re-acquires: truncate to
sentinel + len(ident) before writing so the file body stays the size
of the current holder, never accumulating across runs.

Linux fcntl.flock locks the whole file independent of byte position,
so the seek(0) is harmless on POSIX. The shape works on both.
This commit is contained in:
Igor Lins e Silva
2026-05-08 01:28:42 -03:00
parent ef8d83cc8a
commit d5ce97c7af
+37 -8
View File
@@ -376,10 +376,17 @@ def _format_lock_holder(content: str) -> str:
return f"PID {pid}" return f"PID {pid}"
# Byte 0 of the lock file is reserved as the OS lock sentinel.
# Holder identity is written from byte 1 onward so contenders can read
# the identity without colliding with byte 0 (Windows msvcrt.locking
# blocks both reads and writes on the locked byte).
_LOCK_SENTINEL_BYTES = 1
def _read_lock_holder(lock_file) -> str: def _read_lock_holder(lock_file) -> str:
"""Read the prior holder's identity from the lock-file body, best-effort.""" """Read the prior holder's identity from the lock-file body, best-effort."""
try: try:
lock_file.seek(0) lock_file.seek(_LOCK_SENTINEL_BYTES)
content = lock_file.read().strip() content = lock_file.read().strip()
except OSError: except OSError:
return "another writer (identity not recorded)" return "another writer (identity not recorded)"
@@ -389,11 +396,16 @@ def _read_lock_holder(lock_file) -> str:
def _write_lock_holder(lock_file) -> None: def _write_lock_holder(lock_file) -> None:
"""Record this process's identity in the lock-file body. Best-effort.""" """Record this process's identity in the lock-file body. Best-effort.
Writes from byte 1 onward; byte 0 is the lock sentinel and must not
be touched after acquire (truncating it on Windows can interact
badly with the active byte-range lock).
"""
try: try:
ident = f"{os.getpid()} {' '.join(sys.argv[:3])}".strip() ident = f"{os.getpid()} {' '.join(sys.argv[:3])}".strip()
lock_file.seek(0) lock_file.seek(_LOCK_SENTINEL_BYTES)
lock_file.truncate() lock_file.truncate(_LOCK_SENTINEL_BYTES + len(ident.encode("utf-8")))
lock_file.write(ident) lock_file.write(ident)
lock_file.flush() lock_file.flush()
except OSError: except OSError:
@@ -443,12 +455,27 @@ def mine_palace_lock(palace_path: str):
yield yield
return return
# "a+" preserves the prior holder's identity recorded inside the file so # Ensure the file exists, then open r+ so we can both read the prior
# a failed acquire can name who is holding the lock (#1264). "w" mode # holder's identity (for failure diagnostics) and write our own. "w"
# would have truncated the file before we could read it. # truncates and erases the prior holder. "a+" puts the position at EOF,
lf = open(lock_path, "a+") # which on Windows breaks ``msvcrt.locking`` (it locks 1 byte at the
# *current* position, so two contenders end up locking different bytes
# and silently both acquire — observed as Windows-CI lock test
# failures during #1264 development).
if not os.path.exists(lock_path):
# Touch atomically: O_CREAT|O_EXCL would fail if a concurrent
# contender just created it, which is fine — we proceed to open.
try:
fd = os.open(lock_path, os.O_CREAT | os.O_WRONLY, 0o600)
os.close(fd)
except FileExistsError:
pass
lf = open(lock_path, "r+")
acquired = False acquired = False
try: try:
# Lock byte 0 explicitly. msvcrt.locking is byte-position dependent;
# fcntl.flock is whole-file but the seek is harmless there.
lf.seek(0)
if os.name == "nt": if os.name == "nt":
import msvcrt import msvcrt
@@ -486,6 +513,8 @@ def mine_palace_lock(palace_path: str):
if os.name == "nt": if os.name == "nt":
import msvcrt import msvcrt
# Match the lock region: byte 0.
lf.seek(0)
msvcrt.locking(lf.fileno(), msvcrt.LK_UNLCK, 1) msvcrt.locking(lf.fileno(), msvcrt.LK_UNLCK, 1)
else: else:
import fcntl import fcntl