fix(diary): rebuild closets on hash change + backfill legacy state
Address Copilot review on #925: - Full closet rebuild whenever the content hash differs from prior state, not only on entry-count growth. Without this, an in-place edit (same entry count, different body) updated the drawer but left the closet/search index stale — defeats the verbatim guarantee at the search layer even if the drawer is correct. - Legacy size-only skip path now records the computed content_hash back into state so subsequent runs use the strict hash check instead of remaining on the size-only path indefinitely. - Test updates: typo direction in the regression test now matches the comment (typo "Teh" → fix "The"), assertion now also checks the closet collection reflects the edit, and a new test exercises the legacy-state backfill path.
This commit is contained in:
@@ -133,10 +133,16 @@ def ingest_diaries(
|
||||
if curr_hash == prev_hash:
|
||||
continue
|
||||
elif curr_size == prev_size and prev_size > 0:
|
||||
# Legacy state without content_hash: keep size-based skip so a
|
||||
# post-upgrade run doesn't re-ingest every untouched diary.
|
||||
# Legacy state without content_hash: keep size-based skip but
|
||||
# backfill the hash so future runs use the strict check.
|
||||
state[state_key] = {**prev_entry, "content_hash": curr_hash}
|
||||
continue
|
||||
|
||||
# An in-place edit (same entry count, different content) means existing
|
||||
# closets are stale. Force a full rebuild whenever the hash changes,
|
||||
# not only on entry-count growth.
|
||||
content_changed = prev_hash is not None and curr_hash != prev_hash
|
||||
|
||||
now_iso = datetime.now(timezone.utc).isoformat()
|
||||
drawer_id = _diary_drawer_id(wing, date_str)
|
||||
entities = _extract_entities_for_metadata(text)
|
||||
@@ -163,7 +169,8 @@ def ingest_diaries(
|
||||
|
||||
entries = _split_entries(text)
|
||||
prev_entry_count = state.get(state_key, {}).get("entry_count", 0)
|
||||
new_entries = entries if force else entries[prev_entry_count:]
|
||||
full_rebuild = force or content_changed
|
||||
new_entries = entries if full_rebuild else entries[prev_entry_count:]
|
||||
|
||||
if new_entries:
|
||||
all_lines = []
|
||||
@@ -185,9 +192,9 @@ def ingest_diaries(
|
||||
}
|
||||
if entities:
|
||||
closet_meta["entities"] = entities
|
||||
# On a force rebuild, wipe any leftover numbered closets
|
||||
# from a longer prior run before re-writing.
|
||||
if force:
|
||||
# On any full rebuild (force or detected content edit),
|
||||
# wipe leftover closets from a prior run before re-writing.
|
||||
if full_rebuild:
|
||||
purge_file_closets(closets_col, source_file)
|
||||
n = upsert_closet_lines(closets_col, closet_id_base, all_lines, closet_meta)
|
||||
closets_created += n
|
||||
|
||||
Reference in New Issue
Block a user