fix: address review — re-mine modified files, idempotent add_drawer, cleanup ChromaDB handles

This commit is contained in:
Igor Lins e Silva
2026-04-07 17:44:19 -03:00
parent a4149ab248
commit bf88daa649
4 changed files with 79 additions and 63 deletions
+8 -9
View File
@@ -283,17 +283,16 @@ def tool_add_drawer(
if not col: if not col:
return _no_palace() return _no_palace()
# Duplicate check
dup = tool_check_duplicate(content, threshold=0.9)
if dup.get("is_duplicate"):
return {
"success": False,
"reason": "duplicate",
"matches": dup["matches"],
}
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}" drawer_id = f"drawer_{wing}_{room}_{hashlib.md5(content.encode()).hexdigest()[:16]}"
# Idempotency: if the deterministic ID already exists, return success as a no-op.
try:
existing = col.get(ids=[drawer_id])
if existing and existing["ids"]:
return {"success": True, "reason": "already_exists", "drawer_id": drawer_id}
except Exception:
pass
try: try:
col.upsert( col.upsert(
ids=[drawer_id], ids=[drawer_id],
+24 -8
View File
@@ -403,10 +403,22 @@ def get_collection(palace_path: str):
def file_already_mined(collection, source_file: str) -> bool: def file_already_mined(collection, source_file: str) -> bool:
"""Fast check: has this file been filed before?""" """Fast check: has this file been filed before and is unchanged?
Compares the stored mtime in drawer metadata against the file's current
mtime. Returns False (needs re-mining) when the file has been modified
since it was last mined, or when no mtime was stored.
"""
try: try:
results = collection.get(where={"source_file": source_file}, limit=1) results = collection.get(where={"source_file": source_file}, limit=1)
return len(results.get("ids", [])) > 0 if not results.get("ids"):
return False
stored_meta = results["metadatas"][0] if results.get("metadatas") else {}
stored_mtime = stored_meta.get("source_mtime")
if stored_mtime is None:
return False
current_mtime = os.path.getmtime(source_file)
return float(stored_mtime) == current_mtime
except Exception: except Exception:
return False return False
@@ -417,11 +429,7 @@ def add_drawer(
"""Add one drawer to the palace.""" """Add one drawer to the palace."""
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}" drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}"
try: try:
collection.upsert( metadata = {
documents=[content],
ids=[drawer_id],
metadatas=[
{
"wing": wing, "wing": wing,
"room": room, "room": room,
"source_file": source_file, "source_file": source_file,
@@ -429,7 +437,15 @@ def add_drawer(
"added_by": agent, "added_by": agent,
"filed_at": datetime.now().isoformat(), "filed_at": datetime.now().isoformat(),
} }
], # Store file mtime so we can detect modifications later.
try:
metadata["source_mtime"] = os.path.getmtime(source_file)
except OSError:
pass
collection.upsert(
documents=[content],
ids=[drawer_id],
metadatas=[metadata],
) )
return True return True
except Exception: except Exception:
+3 -1
View File
@@ -102,7 +102,9 @@ def collection(palace_path):
"""A ChromaDB collection pre-seeded in the temp palace.""" """A ChromaDB collection pre-seeded in the temp palace."""
client = chromadb.PersistentClient(path=palace_path) client = chromadb.PersistentClient(path=palace_path)
col = client.get_or_create_collection("mempalace_drawers") col = client.get_or_create_collection("mempalace_drawers")
return col yield col
client.delete_collection("mempalace_drawers")
del client
@pytest.fixture @pytest.fixture
+40 -41
View File
@@ -9,25 +9,26 @@ via monkeypatch to avoid touching real data.
import json import json
def _patch_mcp_server(monkeypatch, config, palace_path, kg): def _patch_mcp_server(monkeypatch, config, kg):
"""Patch the mcp_server module globals to use test fixtures.""" """Patch the mcp_server module globals to use test fixtures."""
from mempalace import mcp_server from mempalace import mcp_server
assert getattr(config, "palace_path", None) == palace_path, (
f"config.palace_path ({getattr(config, 'palace_path', None)!r}) does not match palace_path fixture ({palace_path!r})"
)
monkeypatch.setattr(mcp_server, "_config", config) monkeypatch.setattr(mcp_server, "_config", config)
monkeypatch.setattr(mcp_server, "_kg", kg) monkeypatch.setattr(mcp_server, "_kg", kg)
def _get_collection(palace_path, create=False): def _get_collection(palace_path, create=False):
"""Helper to get collection from test palace.""" """Helper to get collection from test palace.
Returns (client, collection) so callers can clean up the client
when they are done.
"""
import chromadb import chromadb
client = chromadb.PersistentClient(path=palace_path) client = chromadb.PersistentClient(path=palace_path)
if create: if create:
return client.get_or_create_collection("mempalace_drawers") return client, client.get_or_create_collection("mempalace_drawers")
return client.get_collection("mempalace_drawers") return client, client.get_collection("mempalace_drawers")
# ── Protocol Layer ────────────────────────────────────────────────────── # ── Protocol Layer ──────────────────────────────────────────────────────
@@ -77,11 +78,11 @@ class TestHandleRequest:
assert resp["error"]["code"] == -32601 assert resp["error"]["code"] == -32601
def test_tools_call_dispatches(self, monkeypatch, config, palace_path, seeded_kg): def test_tools_call_dispatches(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, palace_path, seeded_kg) _patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import handle_request from mempalace.mcp_server import handle_request
# Create a collection so status works # Create a collection so status works
_get_collection(palace_path, create=True) _client, _col = _get_collection(palace_path, create=True); del _client
resp = handle_request( resp = handle_request(
{ {
@@ -100,8 +101,8 @@ class TestHandleRequest:
class TestReadTools: class TestReadTools:
def test_status_empty_palace(self, monkeypatch, config, palace_path, kg): def test_status_empty_palace(self, monkeypatch, config, palace_path, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
_get_collection(palace_path, create=True) _client, _col = _get_collection(palace_path, create=True); del _client
from mempalace.mcp_server import tool_status from mempalace.mcp_server import tool_status
result = tool_status() result = tool_status()
@@ -109,7 +110,7 @@ class TestReadTools:
assert result["wings"] == {} assert result["wings"] == {}
def test_status_with_data(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_status_with_data(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_status from mempalace.mcp_server import tool_status
result = tool_status() result = tool_status()
@@ -118,7 +119,7 @@ class TestReadTools:
assert "notes" in result["wings"] assert "notes" in result["wings"]
def test_list_wings(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_list_wings(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_list_wings from mempalace.mcp_server import tool_list_wings
result = tool_list_wings() result = tool_list_wings()
@@ -126,7 +127,7 @@ class TestReadTools:
assert result["wings"]["notes"] == 1 assert result["wings"]["notes"] == 1
def test_list_rooms_all(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_list_rooms_all(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_list_rooms from mempalace.mcp_server import tool_list_rooms
result = tool_list_rooms() result = tool_list_rooms()
@@ -135,7 +136,7 @@ class TestReadTools:
assert "planning" in result["rooms"] assert "planning" in result["rooms"]
def test_list_rooms_filtered(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_list_rooms_filtered(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_list_rooms from mempalace.mcp_server import tool_list_rooms
result = tool_list_rooms(wing="project") result = tool_list_rooms(wing="project")
@@ -143,7 +144,7 @@ class TestReadTools:
assert "planning" not in result["rooms"] assert "planning" not in result["rooms"]
def test_get_taxonomy(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_get_taxonomy(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_get_taxonomy from mempalace.mcp_server import tool_get_taxonomy
result = tool_get_taxonomy() result = tool_get_taxonomy()
@@ -151,10 +152,8 @@ class TestReadTools:
assert result["taxonomy"]["project"]["frontend"] == 1 assert result["taxonomy"]["project"]["frontend"] == 1
assert result["taxonomy"]["notes"]["planning"] == 1 assert result["taxonomy"]["notes"]["planning"] == 1
def test_no_palace_returns_error(self, monkeypatch, config, kg, tmp_path): def test_no_palace_returns_error(self, monkeypatch, config, kg):
missing = str(tmp_path / "missing") _patch_mcp_server(monkeypatch, config, kg)
config._file_config["palace_path"] = missing
_patch_mcp_server(monkeypatch, config, missing, kg)
from mempalace.mcp_server import tool_status from mempalace.mcp_server import tool_status
result = tool_status() result = tool_status()
@@ -166,7 +165,7 @@ class TestReadTools:
class TestSearchTool: class TestSearchTool:
def test_search_basic(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_search_basic(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_search from mempalace.mcp_server import tool_search
result = tool_search(query="JWT authentication tokens") result = tool_search(query="JWT authentication tokens")
@@ -177,14 +176,14 @@ class TestSearchTool:
assert "JWT" in top["text"] or "authentication" in top["text"].lower() assert "JWT" in top["text"] or "authentication" in top["text"].lower()
def test_search_with_wing_filter(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_search_with_wing_filter(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_search from mempalace.mcp_server import tool_search
result = tool_search(query="planning", wing="notes") result = tool_search(query="planning", wing="notes")
assert all(r["wing"] == "notes" for r in result["results"]) assert all(r["wing"] == "notes" for r in result["results"])
def test_search_with_room_filter(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_search_with_room_filter(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_search from mempalace.mcp_server import tool_search
result = tool_search(query="database", room="backend") result = tool_search(query="database", room="backend")
@@ -196,8 +195,8 @@ class TestSearchTool:
class TestWriteTools: class TestWriteTools:
def test_add_drawer(self, monkeypatch, config, palace_path, kg): def test_add_drawer(self, monkeypatch, config, palace_path, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
_get_collection(palace_path, create=True) _client, _col = _get_collection(palace_path, create=True); del _client
from mempalace.mcp_server import tool_add_drawer from mempalace.mcp_server import tool_add_drawer
result = tool_add_drawer( result = tool_add_drawer(
@@ -211,8 +210,8 @@ class TestWriteTools:
assert result["drawer_id"].startswith("drawer_test_wing_test_room_") assert result["drawer_id"].startswith("drawer_test_wing_test_room_")
def test_add_drawer_duplicate_detection(self, monkeypatch, config, palace_path, kg): def test_add_drawer_duplicate_detection(self, monkeypatch, config, palace_path, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
_get_collection(palace_path, create=True) _client, _col = _get_collection(palace_path, create=True); del _client
from mempalace.mcp_server import tool_add_drawer from mempalace.mcp_server import tool_add_drawer
content = "This is a unique test memory about Rust ownership and borrowing." content = "This is a unique test memory about Rust ownership and borrowing."
@@ -220,11 +219,11 @@ class TestWriteTools:
assert result1["success"] is True assert result1["success"] is True
result2 = tool_add_drawer(wing="w", room="r", content=content) result2 = tool_add_drawer(wing="w", room="r", content=content)
assert result2["success"] is False assert result2["success"] is True
assert result2["reason"] == "duplicate" assert result2["reason"] == "already_exists"
def test_delete_drawer(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_delete_drawer(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_delete_drawer from mempalace.mcp_server import tool_delete_drawer
result = tool_delete_drawer("drawer_proj_backend_aaa") result = tool_delete_drawer("drawer_proj_backend_aaa")
@@ -232,14 +231,14 @@ class TestWriteTools:
assert seeded_collection.count() == 3 assert seeded_collection.count() == 3
def test_delete_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_delete_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_delete_drawer from mempalace.mcp_server import tool_delete_drawer
result = tool_delete_drawer("nonexistent_drawer") result = tool_delete_drawer("nonexistent_drawer")
assert result["success"] is False assert result["success"] is False
def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg): def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_check_duplicate from mempalace.mcp_server import tool_check_duplicate
# Exact match text from seeded_collection should be flagged # Exact match text from seeded_collection should be flagged
@@ -263,7 +262,7 @@ class TestWriteTools:
class TestKGTools: class TestKGTools:
def test_kg_add(self, monkeypatch, config, palace_path, kg): def test_kg_add(self, monkeypatch, config, palace_path, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_kg_add from mempalace.mcp_server import tool_kg_add
result = tool_kg_add( result = tool_kg_add(
@@ -275,14 +274,14 @@ class TestKGTools:
assert result["success"] is True assert result["success"] is True
def test_kg_query(self, monkeypatch, config, palace_path, seeded_kg): def test_kg_query(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, palace_path, seeded_kg) _patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_query from mempalace.mcp_server import tool_kg_query
result = tool_kg_query(entity="Max") result = tool_kg_query(entity="Max")
assert result["count"] > 0 assert result["count"] > 0
def test_kg_invalidate(self, monkeypatch, config, palace_path, seeded_kg): def test_kg_invalidate(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, palace_path, seeded_kg) _patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_invalidate from mempalace.mcp_server import tool_kg_invalidate
result = tool_kg_invalidate( result = tool_kg_invalidate(
@@ -294,14 +293,14 @@ class TestKGTools:
assert result["success"] is True assert result["success"] is True
def test_kg_timeline(self, monkeypatch, config, palace_path, seeded_kg): def test_kg_timeline(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, palace_path, seeded_kg) _patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_timeline from mempalace.mcp_server import tool_kg_timeline
result = tool_kg_timeline(entity="Alice") result = tool_kg_timeline(entity="Alice")
assert result["count"] > 0 assert result["count"] > 0
def test_kg_stats(self, monkeypatch, config, palace_path, seeded_kg): def test_kg_stats(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, palace_path, seeded_kg) _patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_stats from mempalace.mcp_server import tool_kg_stats
result = tool_kg_stats() result = tool_kg_stats()
@@ -313,8 +312,8 @@ class TestKGTools:
class TestDiaryTools: class TestDiaryTools:
def test_diary_write_and_read(self, monkeypatch, config, palace_path, kg): def test_diary_write_and_read(self, monkeypatch, config, palace_path, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
_get_collection(palace_path, create=True) _client, _col = _get_collection(palace_path, create=True); del _client
from mempalace.mcp_server import tool_diary_write, tool_diary_read from mempalace.mcp_server import tool_diary_write, tool_diary_read
w = tool_diary_write( w = tool_diary_write(
@@ -331,8 +330,8 @@ class TestDiaryTools:
assert "authentication" in r["entries"][0]["content"] assert "authentication" in r["entries"][0]["content"]
def test_diary_read_empty(self, monkeypatch, config, palace_path, kg): def test_diary_read_empty(self, monkeypatch, config, palace_path, kg):
_patch_mcp_server(monkeypatch, config, palace_path, kg) _patch_mcp_server(monkeypatch, config, kg)
_get_collection(palace_path, create=True) _client, _col = _get_collection(palace_path, create=True); del _client
from mempalace.mcp_server import tool_diary_read from mempalace.mcp_server import tool_diary_read
r = tool_diary_read(agent_name="Nobody") r = tool_diary_read(agent_name="Nobody")