fix: use upsert and deterministic IDs to prevent data stagnation
MCP tool_add_drawer: - Make drawer_id content-based: hash full content instead of content[:100] + timestamp. Same content → same ID, eliminating TOCTOU race conditions - Switch from col.add() to col.upsert() so re-filing with updated content updates the existing drawer miner.add_drawer: - Switch from collection.add() to collection.upsert() so re-mining a modified file updates instead of silently failing - Remove the try/except catching 'already exists' — upsert handles this naturally Findings: #11 (HIGH — add ignores updates), #6 (MEDIUM — TOCTOU), #13 (MEDIUM — non-deterministic IDs) Includes test infrastructure from PR #131. 92 tests pass.
This commit is contained in:
@@ -292,10 +292,10 @@ def tool_add_drawer(
|
||||
"matches": dup["matches"],
|
||||
}
|
||||
|
||||
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((content[:100] + datetime.now().isoformat()).encode()).hexdigest()[:16]}"
|
||||
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}"
|
||||
|
||||
try:
|
||||
col.add(
|
||||
col.upsert(
|
||||
ids=[drawer_id],
|
||||
documents=[content],
|
||||
metadatas=[
|
||||
|
||||
+2
-4
@@ -417,7 +417,7 @@ def add_drawer(
|
||||
"""Add one drawer to the palace."""
|
||||
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}"
|
||||
try:
|
||||
collection.add(
|
||||
collection.upsert(
|
||||
documents=[content],
|
||||
ids=[drawer_id],
|
||||
metadatas=[
|
||||
@@ -432,9 +432,7 @@ def add_drawer(
|
||||
],
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
if "already exists" in str(e).lower() or "duplicate" in str(e).lower():
|
||||
return False
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ timeline, stats, and edge cases (duplicate triples, ID collisions).
|
||||
"""
|
||||
|
||||
|
||||
|
||||
class TestEntityOperations:
|
||||
def test_add_entity(self, kg):
|
||||
eid = kg.add_entity("Alice", entity_type="person")
|
||||
@@ -124,7 +125,6 @@ class TestWALMode:
|
||||
conn.close()
|
||||
assert mode == "wal"
|
||||
|
||||
|
||||
class TestStats:
|
||||
def test_stats_empty(self, kg):
|
||||
stats = kg.stats()
|
||||
|
||||
Reference in New Issue
Block a user