fix: address review — re-mine modified files, idempotent add_drawer, cleanup ChromaDB handles

This commit is contained in:
Igor Lins e Silva
2026-04-07 17:44:19 -03:00
parent a4149ab248
commit bf88daa649
4 changed files with 79 additions and 63 deletions
+8 -9
View File
@@ -283,17 +283,16 @@ def tool_add_drawer(
if not col:
return _no_palace()
# Duplicate check
dup = tool_check_duplicate(content, threshold=0.9)
if dup.get("is_duplicate"):
return {
"success": False,
"reason": "duplicate",
"matches": dup["matches"],
}
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}"
# Idempotency: if the deterministic ID already exists, return success as a no-op.
try:
existing = col.get(ids=[drawer_id])
if existing and existing["ids"]:
return {"success": True, "reason": "already_exists", "drawer_id": drawer_id}
except Exception:
pass
try:
col.upsert(
ids=[drawer_id],
+28 -12
View File
@@ -403,10 +403,22 @@ def get_collection(palace_path: str):
def file_already_mined(collection, source_file: str) -> bool:
"""Fast check: has this file been filed before?"""
"""Fast check: has this file been filed before and is unchanged?
Compares the stored mtime in drawer metadata against the file's current
mtime. Returns False (needs re-mining) when the file has been modified
since it was last mined, or when no mtime was stored.
"""
try:
results = collection.get(where={"source_file": source_file}, limit=1)
return len(results.get("ids", [])) > 0
if not results.get("ids"):
return False
stored_meta = results["metadatas"][0] if results.get("metadatas") else {}
stored_mtime = stored_meta.get("source_mtime")
if stored_mtime is None:
return False
current_mtime = os.path.getmtime(source_file)
return float(stored_mtime) == current_mtime
except Exception:
return False
@@ -417,19 +429,23 @@ def add_drawer(
"""Add one drawer to the palace."""
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}"
try:
metadata = {
"wing": wing,
"room": room,
"source_file": source_file,
"chunk_index": chunk_index,
"added_by": agent,
"filed_at": datetime.now().isoformat(),
}
# Store file mtime so we can detect modifications later.
try:
metadata["source_mtime"] = os.path.getmtime(source_file)
except OSError:
pass
collection.upsert(
documents=[content],
ids=[drawer_id],
metadatas=[
{
"wing": wing,
"room": room,
"source_file": source_file,
"chunk_index": chunk_index,
"added_by": agent,
"filed_at": datetime.now().isoformat(),
}
],
metadatas=[metadata],
)
return True
except Exception: