fix(normalize): discard user/gemini turns before session_metadata sentinel

Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/4511e9aa-38e7-440e-a6f8-eda91e576f0f

Co-authored-by: igorls <4753812+igorls@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-04-27 21:41:48 +00:00
committed by GitHub
parent a3e3691e86
commit e7fe6cae14
2 changed files with 30 additions and 3 deletions
+11 -3
View File
@@ -296,9 +296,12 @@ def _try_gemini_jsonl(content: str) -> Optional[str]:
Detection requires a ``session_metadata`` record so this parser does
not false-positive against Claude Code or Codex JSONL passed through
the dispatch chain. ``message_update`` entries are skipped — they
have no message text. Multiple text blocks within a single message's
content array are concatenated in order, separated by newlines.
the dispatch chain. Any ``user``/``gemini`` lines that appear before
``session_metadata`` are discarded — they are treated as preamble
noise, not conversational turns. ``message_update`` entries are
skipped — they have no message text. Multiple text blocks within a
single message's content array are concatenated in order, separated
by newlines.
"""
lines = [line.strip() for line in content.strip().split("\n") if line.strip()]
messages = []
@@ -316,6 +319,11 @@ def _try_gemini_jsonl(content: str) -> Optional[str]:
has_session_metadata = True
continue
# Discard everything (including user/gemini turns) until the
# session_metadata sentinel has been seen.
if not has_session_metadata:
continue
if entry_type not in ("user", "gemini"):
# Skips message_update, system events, anything else.
continue
+19
View File
@@ -594,6 +594,25 @@ def test_gemini_jsonl_does_not_match_codex():
assert result is None
def test_gemini_jsonl_messages_before_session_metadata_discarded():
"""user/gemini turns that appear before the session_metadata sentinel must
be silently discarded, not counted as conversational messages. Only turns
after the sentinel contribute to the transcript."""
lines = [
json.dumps({"type": "user", "content": [{"text": "preamble Q"}]}),
json.dumps({"type": "gemini", "content": [{"text": "preamble A"}]}),
json.dumps({"type": "session_metadata", "sessionId": "s"}),
json.dumps({"type": "user", "content": [{"text": "real Q"}]}),
json.dumps({"type": "gemini", "content": [{"text": "real A"}]}),
]
result = _try_gemini_jsonl("\n".join(lines))
assert result is not None
assert "preamble Q" not in result
assert "preamble A" not in result
assert "> real Q" in result
assert "real A" in result
# ── _try_claude_ai_json ───────────────────────────────────────────────