fix: mark MD5 as non-security in miner drawer ID generation
Add usedforsecurity=False to hashlib.md5() calls in miner.py and convo_miner.py to document that MD5 is used for deterministic ID generation, not cryptographic security. Preserves stable drawer IDs for backward compatibility with existing palaces. Swapping to SHA-256 would change the ID formula and make existing drawers unreachable on re-ingestion. PR #34 covers the MD5 sites in knowledge_graph.py and mcp_server.py. Verified: usedforsecurity kwarg is supported since Python 3.9 (project target per pyproject.toml line 10), confirmed via Context7 CPython docs.
This commit is contained in:
@@ -352,7 +352,7 @@ def mine_convos(
|
||||
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
|
||||
if extract_mode == "general":
|
||||
room_counts[chunk_room] += 1
|
||||
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.md5((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:16]}"
|
||||
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.md5((source_file + str(chunk['chunk_index'])).encode(), usedforsecurity=False).hexdigest()[:16]}"
|
||||
try:
|
||||
collection.add(
|
||||
documents=[chunk["content"]],
|
||||
|
||||
+1
-1
@@ -202,7 +202,7 @@ def add_drawer(
|
||||
collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
|
||||
):
|
||||
"""Add one drawer to the palace."""
|
||||
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode()).hexdigest()[:16]}"
|
||||
drawer_id = f"drawer_{wing}_{room}_{hashlib.md5((source_file + str(chunk_index)).encode(), usedforsecurity=False).hexdigest()[:16]}"
|
||||
try:
|
||||
collection.add(
|
||||
documents=[content],
|
||||
|
||||
Reference in New Issue
Block a user