fix: remove 8-line AI response truncation in convo_miner (#692) (#708)

The _chunk_by_exchange() function was silently truncating AI responses to 8 lines via ai_lines[:8]. Any content beyond line 8 was discarded, violating the project's verbatim storage principle. Now the full AI response is preserved. When a combined exchange exceeds CHUNK_SIZE (800 chars, aligned with miner.py), it is split across consecutive drawers instead of being truncated.
2026-04-12 17:23:57 -04:00
parent d52d6c9622
commit 9b60c6edd7
1 changed files with 25 additions and 2 deletions
@@ -28,6 +28,7 @@ CONVO_EXTENSIONS = {
 }

 MIN_CHUNK_SIZE = 30
+CHUNK_SIZE = 800  # chars per drawer — align with miner.py
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10 MB — skip files larger than this


@@ -51,7 +52,12 @@ def chunk_exchanges(content: str) -> list:


 def _chunk_by_exchange(lines: list) -> list:
-    """One user turn (>) + the AI response that follows = one chunk."""
+    """One user turn (>) + the AI response that follows = one or more chunks.
+
+    The full AI response is preserved verbatim.  When the combined
+    user-turn + response exceeds CHUNK_SIZE the response is split across
+    consecutive drawers so nothing is silently discarded.
+    """
    chunks = []
    i = 0

@@ -73,7 +79,24 @@ def _chunk_by_exchange(lines: list) -> list:
            ai_response = " ".join(ai_lines)
            content = f"{user_turn}\n{ai_response}" if ai_response else user_turn

-            if len(content.strip()) > MIN_CHUNK_SIZE:
+            # Split into multiple drawers when the exchange exceeds CHUNK_SIZE
+            if len(content) > CHUNK_SIZE:
+                # First chunk: user turn + as much response as fits
+                first_part = content[:CHUNK_SIZE]
+                if len(first_part.strip()) > MIN_CHUNK_SIZE:
+                    chunks.append(
+                        {"content": first_part, "chunk_index": len(chunks)}
+                    )
+                # Remaining response in CHUNK_SIZE-sized continuation drawers
+                remainder = content[CHUNK_SIZE:]
+                while remainder:
+                    part = remainder[:CHUNK_SIZE]
+                    remainder = remainder[CHUNK_SIZE:]
+                    if len(part.strip()) > MIN_CHUNK_SIZE:
+                        chunks.append(
+                            {"content": part, "chunk_index": len(chunks)}
+                        )
+            elif len(content.strip()) > MIN_CHUNK_SIZE:
                chunks.append(
                    {
                        "content": content,