fix: use upsert and deterministic IDs to prevent data stagnation

MCP tool_add_drawer:
- Make drawer_id content-based: hash full content instead of
  content[:100] + timestamp. Same content → same ID, eliminating
  TOCTOU race conditions
- Switch from col.add() to col.upsert() so re-filing with updated
  content updates the existing drawer

miner.add_drawer:
- Switch from collection.add() to collection.upsert() so re-mining
  a modified file updates instead of silently failing
- Remove the try/except catching 'already exists' — upsert handles
  this naturally

Findings: #11 (HIGH — add ignores updates), #6 (MEDIUM — TOCTOU),
          #13 (MEDIUM — non-deterministic IDs)

Includes test infrastructure from PR #131.
92 tests pass.
This commit is contained in:
Igor Lins e Silva
2026-04-07 17:27:41 -03:00
parent fcc9ce84f2
commit a4149ab248
3 changed files with 5 additions and 7 deletions
+2 -2
View File
@@ -292,10 +292,10 @@ def tool_add_drawer(
"matches": dup["matches"], "matches": dup["matches"],
} }
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((content[:100] + datetime.now().isoformat()).encode()).hexdigest()[:16]}" drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}"
try: try:
col.add( col.upsert(
ids=[drawer_id], ids=[drawer_id],
documents=[content], documents=[content],
metadatas=[ metadatas=[
+2 -4
View File
@@ -417,7 +417,7 @@ def add_drawer(
"""Add one drawer to the palace.""" """Add one drawer to the palace."""
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}" drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}"
try: try:
collection.add( collection.upsert(
documents=[content], documents=[content],
ids=[drawer_id], ids=[drawer_id],
metadatas=[ metadatas=[
@@ -432,9 +432,7 @@ def add_drawer(
], ],
) )
return True return True
except Exception as e: except Exception:
if "already exists" in str(e).lower() or "duplicate" in str(e).lower():
return False
raise raise
+1 -1
View File
@@ -6,6 +6,7 @@ timeline, stats, and edge cases (duplicate triples, ID collisions).
""" """
class TestEntityOperations: class TestEntityOperations:
def test_add_entity(self, kg): def test_add_entity(self, kg):
eid = kg.add_entity("Alice", entity_type="person") eid = kg.add_entity("Alice", entity_type="person")
@@ -124,7 +125,6 @@ class TestWALMode:
conn.close() conn.close()
assert mode == "wal" assert mode == "wal"
class TestStats: class TestStats:
def test_stats_empty(self, kg): def test_stats_empty(self, kg):
stats = kg.stats() stats = kg.stats()