fix(closet_llm): retry _call_llm on JSONDecodeError instead of bailing

The retry loop already backs off on HTTP 429/503 and rate-limit-shaped
exceptions, but JSONDecodeError exited on the first failure. Local LLM
runtimes occasionally produce malformed JSON (truncated streams, partial
chunks under load), and the retry was effectively dead for that path.

Mirror the 429/503 branch: sleep with exponential backoff and continue
through all 3 attempts, only returning None after the final failure.

Closes #1155
This commit is contained in:
Igor Lins e Silva
2026-05-07 12:38:39 -03:00
parent 03ed4c45cf
commit 2a0ed0cb8f
2 changed files with 28 additions and 1 deletions
+3
View File
@@ -169,6 +169,9 @@ def _call_llm(cfg: LLMConfig, source_file: str, wing: str, room: str, content: s
parsed = json.loads(text)
return parsed, payload.get("usage")
except json.JSONDecodeError:
if attempt < 2:
time.sleep(2**attempt)
continue
return None, None
except urllib.error.HTTPError as e:
# 429 / 503 = retry with backoff
+25 -1
View File
@@ -196,10 +196,34 @@ class TestCallLLM:
}
)
with patch("urllib.request.urlopen", side_effect=fake_urlopen):
with (
patch("urllib.request.urlopen", side_effect=fake_urlopen),
patch("mempalace.closet_llm.time.sleep"),
):
parsed, usage = _call_llm(cfg, "/tmp/x", "w", "r", "c")
assert parsed is None
def test_retries_on_json_decode_error(self):
cfg = self._make_cfg()
call_count = {"n": 0}
def fake_urlopen(req, timeout=None):
call_count["n"] += 1
return _FakeResp(
{
"choices": [{"message": {"content": "not json at all"}}],
"usage": {"prompt_tokens": 1, "completion_tokens": 1},
}
)
with (
patch("urllib.request.urlopen", side_effect=fake_urlopen),
patch("mempalace.closet_llm.time.sleep"),
):
parsed, usage = _call_llm(cfg, "/tmp/x", "w", "r", "c")
assert parsed is None
assert call_count["n"] == 3
# ── regenerate_closets error paths ───────────────────────────────────────