feat(llm): pluggable provider abstraction for entity refinement

Three providers cover the useful space while keeping the zero-API
default:

- `ollama` (default): local models via http://localhost:11434. Works
  fully offline. Tag-matching check accepts both `model` and
  `model:latest` forms.
- `openai-compat`: any /v1/chat/completions endpoint. Covers
  OpenRouter, LM Studio, llama.cpp server, vLLM, Groq, Together,
  Fireworks, and most self-hosted frameworks. API key falls back to
  $OPENAI_API_KEY. Endpoint normalization is forgiving about trailing
  `/v1`.
- `anthropic`: Messages API v2023-06-01. API key falls back to
  $ANTHROPIC_API_KEY. Concatenates multi-block text responses.

JSON mode is normalized across providers — Ollama uses
`format: "json"`, OpenAI-compat uses `response_format`, Anthropic uses
prompt-level instruction. Callers request JSON once; this module
handles the provider-specific plumbing.

No external SDK dependency; stdlib `urllib` throughout. HTTP errors
are wrapped into a single `LLMError` class so callers don't need to
distinguish transport, auth, and parse failures at the call site.

26 tests, all with mocked HTTP — suite runs offline with no real
provider required.
This commit is contained in:
Igor Lins e Silva
2026-04-24 00:46:43 -03:00
parent c7bd2cd8e4
commit df6c7d0dc3
2 changed files with 632 additions and 0 deletions
+305
View File
@@ -0,0 +1,305 @@
"""
llm_client.py — Minimal provider abstraction for LLM-assisted entity refinement.
Three providers cover the useful space:
- ``ollama`` (default): local models via http://localhost:11434. Works fully
offline. Honors MemPalace's "zero-API required" principle.
- ``openai-compat``: any OpenAI-compatible ``/v1/chat/completions`` endpoint.
Covers OpenRouter, LM Studio, llama.cpp server, vLLM, Groq, Fireworks,
Together, and most self-hosted setups.
- ``anthropic``: the official Messages API. Opt-in for users who want Haiku
quality without setting up a local model.
All providers expose the same ``classify(system, user, json_mode)`` method and
the same ``check_available()`` probe. No external SDK dependencies — stdlib
``urllib`` only.
JSON mode matters here: we always ask for structured output. Providers
differ on how to request it (Ollama: ``format: json``; OpenAI-compat:
``response_format``; Anthropic: prompt-level instruction) and this module
normalizes that away from the caller.
"""
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from typing import Optional
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
class LLMError(RuntimeError):
"""Raised for any provider failure — transport, parse, auth, missing model."""
@dataclass
class LLMResponse:
text: str
model: str
provider: str
raw: dict
# ==================== BASE ====================
class LLMProvider:
name: str = "base"
def __init__(
self,
model: str,
endpoint: Optional[str] = None,
api_key: Optional[str] = None,
timeout: int = 120,
):
self.model = model
self.endpoint = endpoint
self.api_key = api_key
self.timeout = timeout
def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
raise NotImplementedError
def check_available(self) -> tuple[bool, str]:
"""Return ``(ok, message)``. Fast probe that the provider is reachable."""
raise NotImplementedError
def _http_post_json(url: str, body: dict, headers: dict, timeout: int) -> dict:
"""POST JSON and return the parsed response. Raises LLMError on any failure."""
req = Request(
url,
data=json.dumps(body).encode("utf-8"),
headers={"Content-Type": "application/json", **headers},
)
try:
with urlopen(req, timeout=timeout) as resp:
return json.loads(resp.read())
except HTTPError as e:
detail = ""
try:
detail = e.read().decode("utf-8", errors="replace")[:500]
except Exception:
pass
raise LLMError(f"HTTP {e.code} from {url}: {detail or e.reason}") from e
except (URLError, OSError) as e:
raise LLMError(f"Cannot reach {url}: {e}") from e
except json.JSONDecodeError as e:
raise LLMError(f"Malformed response from {url}: {e}") from e
# ==================== OLLAMA ====================
class OllamaProvider(LLMProvider):
name = "ollama"
DEFAULT_ENDPOINT = "http://localhost:11434"
def __init__(
self,
model: str,
endpoint: Optional[str] = None,
timeout: int = 180,
**_: object,
):
super().__init__(
model=model,
endpoint=endpoint or self.DEFAULT_ENDPOINT,
timeout=timeout,
)
def check_available(self) -> tuple[bool, str]:
try:
with urlopen(f"{self.endpoint}/api/tags", timeout=5) as resp:
data = json.loads(resp.read())
except (URLError, HTTPError, OSError, json.JSONDecodeError) as e:
return False, f"Cannot reach Ollama at {self.endpoint}: {e}"
names = {m.get("name", "") for m in data.get("models", []) or []}
# Ollama tags may or may not include ':latest' — accept either form
wanted = {self.model, f"{self.model}:latest"}
if not names & wanted:
return (
False,
f"Model '{self.model}' not loaded in Ollama. " f"Run: ollama pull {self.model}",
)
return True, "ok"
def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
body: dict = {
"model": self.model,
"messages": [
{"role": "system", "content": system},
{"role": "user", "content": user},
],
"stream": False,
"options": {"temperature": 0.1},
}
if json_mode:
body["format"] = "json"
data = _http_post_json(f"{self.endpoint}/api/chat", body, headers={}, timeout=self.timeout)
text = (data.get("message") or {}).get("content", "")
if not text:
raise LLMError(f"Empty response from Ollama (model={self.model})")
return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
# ==================== OPENAI-COMPAT ====================
class OpenAICompatProvider(LLMProvider):
"""Any OpenAI-compatible ``/v1/chat/completions`` endpoint.
Supply ``--llm-endpoint http://host:port`` (with or without ``/v1``).
API key via ``--llm-api-key`` or the ``OPENAI_API_KEY`` env var.
"""
name = "openai-compat"
def __init__(
self,
model: str,
endpoint: Optional[str] = None,
api_key: Optional[str] = None,
timeout: int = 120,
**_: object,
):
resolved_key = api_key or os.environ.get("OPENAI_API_KEY")
super().__init__(model=model, endpoint=endpoint, api_key=resolved_key, timeout=timeout)
def _resolve_url(self) -> str:
if not self.endpoint:
raise LLMError("openai-compat provider requires --llm-endpoint")
url = self.endpoint.rstrip("/")
if url.endswith("/chat/completions"):
return url
if not url.endswith("/v1"):
url = f"{url}/v1"
return f"{url}/chat/completions"
def check_available(self) -> tuple[bool, str]:
if not self.endpoint:
return False, "no --llm-endpoint configured"
base = self.endpoint.rstrip("/")
base = base.removesuffix("/chat/completions").removesuffix("/v1")
try:
req = Request(f"{base}/v1/models")
if self.api_key:
req.add_header("Authorization", f"Bearer {self.api_key}")
with urlopen(req, timeout=5):
pass
except (URLError, HTTPError, OSError) as e:
return False, f"Cannot reach {self.endpoint}: {e}"
return True, "ok"
def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
body: dict = {
"model": self.model,
"messages": [
{"role": "system", "content": system},
{"role": "user", "content": user},
],
"temperature": 0.1,
}
if json_mode:
body["response_format"] = {"type": "json_object"}
headers = {}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
data = _http_post_json(self._resolve_url(), body, headers=headers, timeout=self.timeout)
try:
text = data["choices"][0]["message"]["content"]
except (KeyError, IndexError, TypeError) as e:
raise LLMError(f"Unexpected response shape: {e}") from e
if not text:
raise LLMError(f"Empty response from {self.name} (model={self.model})")
return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
# ==================== ANTHROPIC ====================
class AnthropicProvider(LLMProvider):
name = "anthropic"
DEFAULT_ENDPOINT = "https://api.anthropic.com"
API_VERSION = "2023-06-01"
def __init__(
self,
model: str,
api_key: Optional[str] = None,
endpoint: Optional[str] = None,
timeout: int = 120,
**_: object,
):
key = api_key or os.environ.get("ANTHROPIC_API_KEY")
super().__init__(
model=model,
endpoint=endpoint or self.DEFAULT_ENDPOINT,
api_key=key,
timeout=timeout,
)
def check_available(self) -> tuple[bool, str]:
if not self.api_key:
return False, "ANTHROPIC_API_KEY not set (use --llm-api-key or env)"
# Don't probe — a live request would cost money. First real call will
# surface auth errors if the key is invalid.
return True, "ok"
def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
if not self.api_key:
raise LLMError("Anthropic provider requires ANTHROPIC_API_KEY env or --llm-api-key")
sys_prompt = system
if json_mode:
sys_prompt += "\n\nRespond with valid JSON only, no prose."
body = {
"model": self.model,
"max_tokens": 2048,
"temperature": 0.1,
"system": sys_prompt,
"messages": [{"role": "user", "content": user}],
}
headers = {
"X-API-Key": self.api_key,
"anthropic-version": self.API_VERSION,
}
data = _http_post_json(
f"{self.endpoint}/v1/messages", body, headers=headers, timeout=self.timeout
)
try:
text = "".join(
b.get("text", "") for b in data.get("content", []) or [] if b.get("type") == "text"
)
except (AttributeError, TypeError) as e:
raise LLMError(f"Unexpected response shape: {e}") from e
if not text:
raise LLMError(f"Empty response from Anthropic (model={self.model})")
return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
# ==================== FACTORY ====================
PROVIDERS: dict[str, type[LLMProvider]] = {
"ollama": OllamaProvider,
"openai-compat": OpenAICompatProvider,
"anthropic": AnthropicProvider,
}
def get_provider(
name: str,
model: str,
endpoint: Optional[str] = None,
api_key: Optional[str] = None,
timeout: int = 120,
) -> LLMProvider:
"""Build a provider by name. Raises LLMError on unknown provider."""
cls = PROVIDERS.get(name)
if cls is None:
raise LLMError(f"Unknown provider '{name}'. Choices: {sorted(PROVIDERS.keys())}")
return cls(model=model, endpoint=endpoint, api_key=api_key, timeout=timeout)