fix: use upsert and deterministic IDs to prevent data stagnation

MCP tool_add_drawer:
- Make drawer_id content-based: hash full content instead of
  content[:100] + timestamp. Same content → same ID, eliminating
  TOCTOU race conditions
- Switch from col.add() to col.upsert() so re-filing with updated
  content updates the existing drawer

miner.add_drawer:
- Switch from collection.add() to collection.upsert() so re-mining
  a modified file updates instead of silently failing
- Remove the try/except catching 'already exists' — upsert handles
  this naturally

Findings: #11 (HIGH — add ignores updates), #6 (MEDIUM — TOCTOU),
          #13 (MEDIUM — non-deterministic IDs)

Includes test infrastructure from PR #131.
92 tests pass.
This commit is contained in:
Igor Lins e Silva
2026-04-07 17:27:41 -03:00
parent fcc9ce84f2
commit a4149ab248
3 changed files with 5 additions and 7 deletions
+2 -2
View File
@@ -292,10 +292,10 @@ def tool_add_drawer(
"matches": dup["matches"],
}
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((content[:100] + datetime.now().isoformat()).encode()).hexdigest()[:16]}"
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}"
try:
col.add(
col.upsert(
ids=[drawer_id],
documents=[content],
metadatas=[
+2 -4
View File
@@ -417,7 +417,7 @@ def add_drawer(
"""Add one drawer to the palace."""
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}"
try:
collection.add(
collection.upsert(
documents=[content],
ids=[drawer_id],
metadatas=[
@@ -432,9 +432,7 @@ def add_drawer(
],
)
return True
except Exception as e:
if "already exists" in str(e).lower() or "duplicate" in str(e).lower():
return False
except Exception:
raise
+1 -1
View File
@@ -6,6 +6,7 @@ timeline, stats, and edge cases (duplicate triples, ID collisions).
"""
class TestEntityOperations:
def test_add_entity(self, kg):
eid = kg.add_entity("Alice", entity_type="person")
@@ -124,7 +125,6 @@ class TestWALMode:
conn.close()
assert mode == "wal"
class TestStats:
def test_stats_empty(self, kg):
stats = kg.stats()