fix(mcp): forward valid_to and source params in kg_add/kg_invalidate (#1314)

`tool_kg_add` previously accepted only `valid_from` and `source_closet`,
silently dropping `valid_to`, `source_file`, and `source_drawer_id` at
the MCP boundary. Backfilling already-ended historical facts therefore
collapsed to "still current," and adapter provenance never reached
the SQLite layer even though `KnowledgeGraph.add_triple` already
supported every column.

`tool_kg_invalidate` returned the literal string `"today"` whenever the
caller omitted `ended`, hiding the actual stamped date from anyone trying
to verify what got persisted.

Changes:
- Extend `tool_kg_add` signature + MCP input_schema with `valid_to`,
  `source_file`, `source_drawer_id`; forward all of them to
  `_kg.add_triple` and to the WAL log.
- Resolve `ended` to `date.today().isoformat()` in `tool_kg_invalidate`
  before logging / returning, so the response always reports the actual
  date stored in `valid_to`.
- Add regression tests for valid_to round-trip, source_file /
  source_drawer_id provenance, and the resolved-ended-date contract.
- Leave TODO(#1283) markers so the open ISO-8601 validation PR can drop
  `validate_iso_date` over `valid_from` / `valid_to` / `ended` cleanly.

The underlying `KnowledgeGraph.add_triple` already accepted these
kwargs (RFC 002 §5.5) — only the MCP edge needed wiring up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Igor Lins e Silva
2026-05-02 22:54:32 -03:00
parent d07b730f08
commit e4e25ed186
2 changed files with 153 additions and 16 deletions
+63 -10
View File
@@ -47,7 +47,7 @@ import json # noqa: E402
import logging # noqa: E402
import hashlib # noqa: E402
import time # noqa: E402
from datetime import datetime # noqa: E402
from datetime import date, datetime # noqa: E402
from pathlib import Path # noqa: E402
from .config import ( # noqa: E402
@@ -677,7 +677,7 @@ def tool_check_duplicate(content: str, threshold: float = 0.9):
"vector_disabled": True,
"vector_disabled_reason": _vector_disabled_reason,
"hint": (
"duplicate detection requires vector search; run " "`mempalace repair` to restore"
"duplicate detection requires vector search; run `mempalace repair` to restore"
),
}
try:
@@ -1061,9 +1061,26 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"):
def tool_kg_add(
subject: str, predicate: str, object: str, valid_from: str = None, source_closet: str = None
subject: str,
predicate: str,
object: str,
valid_from: str = None,
valid_to: str = None,
source_closet: str = None,
source_file: str = None,
source_drawer_id: str = None,
):
"""Add a relationship to the knowledge graph."""
"""Add a relationship to the knowledge graph.
All temporal and provenance fields are optional. ``valid_to`` lets callers
backfill historical facts with a known end date in a single call (instead
of a separate ``kg_invalidate``). ``source_file`` and ``source_drawer_id``
are RFC 002 §5.5 provenance fields populated by adapters / bulk importers.
TODO(#1283): once the ISO-8601 validation PR lands, wire ``validate_iso_date``
over ``valid_from`` / ``valid_to`` here so malformed dates fail fast at the
MCP boundary instead of silently producing empty query results.
"""
try:
subject = sanitize_kg_value(subject, "subject")
predicate = sanitize_name(predicate, "predicate")
@@ -1078,32 +1095,56 @@ def tool_kg_add(
"predicate": predicate,
"object": object,
"valid_from": valid_from,
"valid_to": valid_to,
"source_closet": source_closet,
"source_file": source_file,
"source_drawer_id": source_drawer_id,
},
)
triple_id = _kg.add_triple(
subject, predicate, object, valid_from=valid_from, source_closet=source_closet
subject,
predicate,
object,
valid_from=valid_from,
valid_to=valid_to,
source_closet=source_closet,
source_file=source_file,
source_drawer_id=source_drawer_id,
)
return {"success": True, "triple_id": triple_id, "fact": f"{subject}{predicate}{object}"}
def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None):
"""Mark a fact as no longer true (set end date)."""
"""Mark a fact as no longer true (set end date).
Returns the actual ``ended`` date that was stored — when the caller omits
``ended``, the underlying graph stamps ``date.today()``, and the response
reflects that resolved value (instead of the literal string ``"today"``)
so callers can verify what was persisted.
TODO(#1283): apply ``validate_iso_date`` to ``ended`` once that PR lands.
"""
try:
subject = sanitize_kg_value(subject, "subject")
predicate = sanitize_name(predicate, "predicate")
object = sanitize_kg_value(object, "object")
except ValueError as e:
return {"success": False, "error": str(e)}
resolved_ended = ended or date.today().isoformat()
_wal_log(
"kg_invalidate",
{"subject": subject, "predicate": predicate, "object": object, "ended": ended},
{
"subject": subject,
"predicate": predicate,
"object": object,
"ended": resolved_ended,
},
)
_kg.invalidate(subject, predicate, object, ended=ended)
_kg.invalidate(subject, predicate, object, ended=resolved_ended)
return {
"success": True,
"fact": f"{subject}{predicate}{object}",
"ended": ended or "today",
"ended": resolved_ended,
}
@@ -1440,7 +1481,7 @@ TOOLS = {
"handler": tool_kg_query,
},
"mempalace_kg_add": {
"description": "Add a fact to the knowledge graph. Subject → predicate → object with optional time window. E.g. ('Max', 'started_school', 'Year 7', valid_from='2026-09-01').",
"description": "Add a fact to the knowledge graph. Subject → predicate → object with optional time window. E.g. ('Max', 'started_school', 'Year 7', valid_from='2026-09-01'). Pass valid_to to backfill an already-ended historical fact in a single call.",
"input_schema": {
"type": "object",
"properties": {
@@ -1454,10 +1495,22 @@ TOOLS = {
"type": "string",
"description": "When this became true (YYYY-MM-DD, optional)",
},
"valid_to": {
"type": "string",
"description": "When this stopped being true (YYYY-MM-DD, optional). Use for backfilling already-ended historical facts.",
},
"source_closet": {
"type": "string",
"description": "Closet ID where this fact appears (optional)",
},
"source_file": {
"type": "string",
"description": "Source file path the fact was extracted from (optional)",
},
"source_drawer_id": {
"type": "string",
"description": "Drawer ID the fact was extracted from (optional, RFC 002 §5.5 provenance)",
},
},
"required": ["subject", "predicate", "object"],
},
+90 -6
View File
@@ -476,9 +476,9 @@ class TestWriteTools:
assert result1["success"] is True
assert result2["success"] is True
assert (
result1["drawer_id"] != result2["drawer_id"]
), "Documents with shared header but different content must have distinct drawer IDs"
assert result1["drawer_id"] != result2["drawer_id"], (
"Documents with shared header but different content must have distinct drawer IDs"
)
def test_delete_drawer(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, kg)
@@ -650,6 +650,90 @@ class TestKGTools:
ended="2026-03-01",
)
assert result["success"] is True
# Regression #1314: response must echo the actual ended date,
# not silently drop it and return the literal string "today".
assert result["ended"] == "2026-03-01"
def test_kg_add_forwards_valid_to(self, monkeypatch, config, palace_path, kg):
"""Regression #1314 case 1: valid_to must round-trip through kg_add."""
_patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_kg_add
result = tool_kg_add(
subject="_test_temporal",
predicate="had_value",
object="probe",
valid_from="2026-01-01",
valid_to="2026-04-28",
)
assert result["success"] is True
facts = kg.query_entity("_test_temporal")
assert len(facts) == 1
assert facts[0]["valid_from"] == "2026-01-01"
assert facts[0]["valid_to"] == "2026-04-28"
# An already-ended fact must not be reported as still current.
assert facts[0]["current"] is False
def test_kg_add_forwards_source_provenance(self, monkeypatch, config, palace_path, kg):
"""Regression #1314 case 3: source_file / source_drawer_id reach storage."""
_patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_kg_add
result = tool_kg_add(
subject="operating-verb",
predicate="candidate",
object="husbandry",
valid_from="2026-04-28",
source_closet="closet-42",
source_file="docs/decisions.md",
source_drawer_id="drawer_abc123",
)
assert result["success"] is True
triple_id = result["triple_id"]
# Read raw row to verify all provenance columns persisted.
with kg._lock:
row = (
kg._conn()
.execute(
"SELECT source_closet, source_file, source_drawer_id FROM triples WHERE id = ?",
(triple_id,),
)
.fetchone()
)
assert row is not None
assert row["source_closet"] == "closet-42"
assert row["source_file"] == "docs/decisions.md"
assert row["source_drawer_id"] == "drawer_abc123"
def test_kg_invalidate_returns_actual_ended_date(
self, monkeypatch, config, palace_path, seeded_kg
):
"""Regression #1314 case 2: response reports the resolved date, not 'today'."""
from datetime import date as _date
_patch_mcp_server(monkeypatch, config, seeded_kg)
from mempalace.mcp_server import tool_kg_invalidate
# Caller-supplied date round-trips into the response.
explicit = tool_kg_invalidate(
subject="Max",
predicate="does",
object="swimming",
ended="2026-04-28",
)
assert explicit["ended"] == "2026-04-28"
# Caller-omitted date resolves to today's ISO date — never the
# literal string "today" the buggy implementation used to return.
implicit = tool_kg_invalidate(
subject="Max",
predicate="loves",
object="Chess",
)
assert implicit["ended"] != "today"
assert implicit["ended"] == _date.today().isoformat()
def test_kg_timeline(self, monkeypatch, config, palace_path, seeded_kg):
_patch_mcp_server(monkeypatch, config, seeded_kg)
@@ -960,9 +1044,9 @@ class TestCacheInvalidation:
all_calls = captured["get"] + captured["create"]
assert all_calls, "expected get_collection or create_collection to be called"
for kwargs in all_calls:
assert (
"embedding_function" in kwargs
), f"missing embedding_function= in chromadb call: {kwargs}"
assert "embedding_function" in kwargs, (
f"missing embedding_function= in chromadb call: {kwargs}"
)
assert kwargs["embedding_function"] is not None
# Same expectation on the create=False (cache-miss) reopen path.