fix(normalize): discard user/gemini turns before session_metadata sentinel
Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/4511e9aa-38e7-440e-a6f8-eda91e576f0f Co-authored-by: igorls <4753812+igorls@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
a3e3691e86
commit
e7fe6cae14
+11
-3
@@ -296,9 +296,12 @@ def _try_gemini_jsonl(content: str) -> Optional[str]:
|
|||||||
|
|
||||||
Detection requires a ``session_metadata`` record so this parser does
|
Detection requires a ``session_metadata`` record so this parser does
|
||||||
not false-positive against Claude Code or Codex JSONL passed through
|
not false-positive against Claude Code or Codex JSONL passed through
|
||||||
the dispatch chain. ``message_update`` entries are skipped — they
|
the dispatch chain. Any ``user``/``gemini`` lines that appear before
|
||||||
have no message text. Multiple text blocks within a single message's
|
``session_metadata`` are discarded — they are treated as preamble
|
||||||
content array are concatenated in order, separated by newlines.
|
noise, not conversational turns. ``message_update`` entries are
|
||||||
|
skipped — they have no message text. Multiple text blocks within a
|
||||||
|
single message's content array are concatenated in order, separated
|
||||||
|
by newlines.
|
||||||
"""
|
"""
|
||||||
lines = [line.strip() for line in content.strip().split("\n") if line.strip()]
|
lines = [line.strip() for line in content.strip().split("\n") if line.strip()]
|
||||||
messages = []
|
messages = []
|
||||||
@@ -316,6 +319,11 @@ def _try_gemini_jsonl(content: str) -> Optional[str]:
|
|||||||
has_session_metadata = True
|
has_session_metadata = True
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Discard everything (including user/gemini turns) until the
|
||||||
|
# session_metadata sentinel has been seen.
|
||||||
|
if not has_session_metadata:
|
||||||
|
continue
|
||||||
|
|
||||||
if entry_type not in ("user", "gemini"):
|
if entry_type not in ("user", "gemini"):
|
||||||
# Skips message_update, system events, anything else.
|
# Skips message_update, system events, anything else.
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -594,6 +594,25 @@ def test_gemini_jsonl_does_not_match_codex():
|
|||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_gemini_jsonl_messages_before_session_metadata_discarded():
|
||||||
|
"""user/gemini turns that appear before the session_metadata sentinel must
|
||||||
|
be silently discarded, not counted as conversational messages. Only turns
|
||||||
|
after the sentinel contribute to the transcript."""
|
||||||
|
lines = [
|
||||||
|
json.dumps({"type": "user", "content": [{"text": "preamble Q"}]}),
|
||||||
|
json.dumps({"type": "gemini", "content": [{"text": "preamble A"}]}),
|
||||||
|
json.dumps({"type": "session_metadata", "sessionId": "s"}),
|
||||||
|
json.dumps({"type": "user", "content": [{"text": "real Q"}]}),
|
||||||
|
json.dumps({"type": "gemini", "content": [{"text": "real A"}]}),
|
||||||
|
]
|
||||||
|
result = _try_gemini_jsonl("\n".join(lines))
|
||||||
|
assert result is not None
|
||||||
|
assert "preamble Q" not in result
|
||||||
|
assert "preamble A" not in result
|
||||||
|
assert "> real Q" in result
|
||||||
|
assert "real A" in result
|
||||||
|
|
||||||
|
|
||||||
# ── _try_claude_ai_json ───────────────────────────────────────────────
|
# ── _try_claude_ai_json ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user