From a4149ab248c5aa6c314eec5e4ebaa9eaef55c29c Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:27:41 -0300 Subject: [PATCH] fix: use upsert and deterministic IDs to prevent data stagnation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MCP tool_add_drawer: - Make drawer_id content-based: hash full content instead of content[:100] + timestamp. Same content → same ID, eliminating TOCTOU race conditions - Switch from col.add() to col.upsert() so re-filing with updated content updates the existing drawer miner.add_drawer: - Switch from collection.add() to collection.upsert() so re-mining a modified file updates instead of silently failing - Remove the try/except catching 'already exists' — upsert handles this naturally Findings: #11 (HIGH — add ignores updates), #6 (MEDIUM — TOCTOU), #13 (MEDIUM — non-deterministic IDs) Includes test infrastructure from PR #131. 92 tests pass. --- mempalace/mcp_server.py | 4 ++-- mempalace/miner.py | 6 ++---- tests/test_knowledge_graph.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index b447249..bda4c1a 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -292,10 +292,10 @@ def tool_add_drawer( "matches": dup["matches"], } - drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((content[:100] + datetime.now().isoformat()).encode()).hexdigest()[:16]}" + drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}" try: - col.add( + col.upsert( ids=[drawer_id], documents=[content], metadatas=[ diff --git a/mempalace/miner.py b/mempalace/miner.py index 7b4e949..a53cf76 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -417,7 +417,7 @@ def add_drawer( """Add one drawer to the palace.""" drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}" try: - collection.add( + collection.upsert( documents=[content], ids=[drawer_id], metadatas=[ @@ -432,9 +432,7 @@ def add_drawer( ], ) return True - except Exception as e: - if "already exists" in str(e).lower() or "duplicate" in str(e).lower(): - return False + except Exception: raise diff --git a/tests/test_knowledge_graph.py b/tests/test_knowledge_graph.py index d7d9838..535eace 100644 --- a/tests/test_knowledge_graph.py +++ b/tests/test_knowledge_graph.py @@ -6,6 +6,7 @@ timeline, stats, and edge cases (duplicate triples, ID collisions). """ + class TestEntityOperations: def test_add_entity(self, kg): eid = kg.add_entity("Alice", entity_type="person") @@ -124,7 +125,6 @@ class TestWALMode: conn.close() assert mode == "wal" - class TestStats: def test_stats_empty(self, kg): stats = kg.stats()