feat(llm): pluggable provider abstraction for entity refinement

Three providers cover the useful space while keeping the zero-API default: - `ollama` (default): local models via http://localhost:11434. Works fully offline. Tag-matching check accepts both `model` and `model:latest` forms. - `openai-compat`: any /v1/chat/completions endpoint. Covers OpenRouter, LM Studio, llama.cpp server, vLLM, Groq, Together, Fireworks, and most self-hosted frameworks. API key falls back to $OPENAI_API_KEY. Endpoint normalization is forgiving about trailing `/v1`. - `anthropic`: Messages API v2023-06-01. API key falls back to $ANTHROPIC_API_KEY. Concatenates multi-block text responses. JSON mode is normalized across providers — Ollama uses `format: "json"`, OpenAI-compat uses `response_format`, Anthropic uses prompt-level instruction. Callers request JSON once; this module handles the provider-specific plumbing. No external SDK dependency; stdlib `urllib` throughout. HTTP errors are wrapped into a single `LLMError` class so callers don't need to distinguish transport, auth, and parse failures at the call site. 26 tests, all with mocked HTTP — suite runs offline with no real provider required.
2026-04-24 00:46:43 -03:00
parent c7bd2cd8e4
commit df6c7d0dc3
2 changed files with 632 additions and 0 deletions
@@ -0,0 +1,305 @@
+"""
+llm_client.py — Minimal provider abstraction for LLM-assisted entity refinement.
+
+Three providers cover the useful space:
+
+- ``ollama`` (default): local models via http://localhost:11434. Works fully
+  offline. Honors MemPalace's "zero-API required" principle.
+- ``openai-compat``: any OpenAI-compatible ``/v1/chat/completions`` endpoint.
+  Covers OpenRouter, LM Studio, llama.cpp server, vLLM, Groq, Fireworks,
+  Together, and most self-hosted setups.
+- ``anthropic``: the official Messages API. Opt-in for users who want Haiku
+  quality without setting up a local model.
+
+All providers expose the same ``classify(system, user, json_mode)`` method and
+the same ``check_available()`` probe. No external SDK dependencies — stdlib
+``urllib`` only.
+
+JSON mode matters here: we always ask for structured output. Providers
+differ on how to request it (Ollama: ``format: json``; OpenAI-compat:
+``response_format``; Anthropic: prompt-level instruction) and this module
+normalizes that away from the caller.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass
+from typing import Optional
+from urllib.error import HTTPError, URLError
+from urllib.request import Request, urlopen
+
+
+class LLMError(RuntimeError):
+    """Raised for any provider failure — transport, parse, auth, missing model."""
+
+
+@dataclass
+class LLMResponse:
+    text: str
+    model: str
+    provider: str
+    raw: dict
+
+
+# ==================== BASE ====================
+
+
+class LLMProvider:
+    name: str = "base"
+
+    def __init__(
+        self,
+        model: str,
+        endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+        timeout: int = 120,
+    ):
+        self.model = model
+        self.endpoint = endpoint
+        self.api_key = api_key
+        self.timeout = timeout
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        raise NotImplementedError
+
+    def check_available(self) -> tuple[bool, str]:
+        """Return ``(ok, message)``. Fast probe that the provider is reachable."""
+        raise NotImplementedError
+
+
+def _http_post_json(url: str, body: dict, headers: dict, timeout: int) -> dict:
+    """POST JSON and return the parsed response. Raises LLMError on any failure."""
+    req = Request(
+        url,
+        data=json.dumps(body).encode("utf-8"),
+        headers={"Content-Type": "application/json", **headers},
+    )
+    try:
+        with urlopen(req, timeout=timeout) as resp:
+            return json.loads(resp.read())
+    except HTTPError as e:
+        detail = ""
+        try:
+            detail = e.read().decode("utf-8", errors="replace")[:500]
+        except Exception:
+            pass
+        raise LLMError(f"HTTP {e.code} from {url}: {detail or e.reason}") from e
+    except (URLError, OSError) as e:
+        raise LLMError(f"Cannot reach {url}: {e}") from e
+    except json.JSONDecodeError as e:
+        raise LLMError(f"Malformed response from {url}: {e}") from e
+
+
+# ==================== OLLAMA ====================
+
+
+class OllamaProvider(LLMProvider):
+    name = "ollama"
+    DEFAULT_ENDPOINT = "http://localhost:11434"
+
+    def __init__(
+        self,
+        model: str,
+        endpoint: Optional[str] = None,
+        timeout: int = 180,
+        **_: object,
+    ):
+        super().__init__(
+            model=model,
+            endpoint=endpoint or self.DEFAULT_ENDPOINT,
+            timeout=timeout,
+        )
+
+    def check_available(self) -> tuple[bool, str]:
+        try:
+            with urlopen(f"{self.endpoint}/api/tags", timeout=5) as resp:
+                data = json.loads(resp.read())
+        except (URLError, HTTPError, OSError, json.JSONDecodeError) as e:
+            return False, f"Cannot reach Ollama at {self.endpoint}: {e}"
+        names = {m.get("name", "") for m in data.get("models", []) or []}
+        # Ollama tags may or may not include ':latest' — accept either form
+        wanted = {self.model, f"{self.model}:latest"}
+        if not names & wanted:
+            return (
+                False,
+                f"Model '{self.model}' not loaded in Ollama. " f"Run: ollama pull {self.model}",
+            )
+        return True, "ok"
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        body: dict = {
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            "stream": False,
+            "options": {"temperature": 0.1},
+        }
+        if json_mode:
+            body["format"] = "json"
+        data = _http_post_json(f"{self.endpoint}/api/chat", body, headers={}, timeout=self.timeout)
+        text = (data.get("message") or {}).get("content", "")
+        if not text:
+            raise LLMError(f"Empty response from Ollama (model={self.model})")
+        return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
+
+
+# ==================== OPENAI-COMPAT ====================
+
+
+class OpenAICompatProvider(LLMProvider):
+    """Any OpenAI-compatible ``/v1/chat/completions`` endpoint.
+
+    Supply ``--llm-endpoint http://host:port`` (with or without ``/v1``).
+    API key via ``--llm-api-key`` or the ``OPENAI_API_KEY`` env var.
+    """
+
+    name = "openai-compat"
+
+    def __init__(
+        self,
+        model: str,
+        endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+        timeout: int = 120,
+        **_: object,
+    ):
+        resolved_key = api_key or os.environ.get("OPENAI_API_KEY")
+        super().__init__(model=model, endpoint=endpoint, api_key=resolved_key, timeout=timeout)
+
+    def _resolve_url(self) -> str:
+        if not self.endpoint:
+            raise LLMError("openai-compat provider requires --llm-endpoint")
+        url = self.endpoint.rstrip("/")
+        if url.endswith("/chat/completions"):
+            return url
+        if not url.endswith("/v1"):
+            url = f"{url}/v1"
+        return f"{url}/chat/completions"
+
+    def check_available(self) -> tuple[bool, str]:
+        if not self.endpoint:
+            return False, "no --llm-endpoint configured"
+        base = self.endpoint.rstrip("/")
+        base = base.removesuffix("/chat/completions").removesuffix("/v1")
+        try:
+            req = Request(f"{base}/v1/models")
+            if self.api_key:
+                req.add_header("Authorization", f"Bearer {self.api_key}")
+            with urlopen(req, timeout=5):
+                pass
+        except (URLError, HTTPError, OSError) as e:
+            return False, f"Cannot reach {self.endpoint}: {e}"
+        return True, "ok"
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        body: dict = {
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            "temperature": 0.1,
+        }
+        if json_mode:
+            body["response_format"] = {"type": "json_object"}
+        headers = {}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        data = _http_post_json(self._resolve_url(), body, headers=headers, timeout=self.timeout)
+        try:
+            text = data["choices"][0]["message"]["content"]
+        except (KeyError, IndexError, TypeError) as e:
+            raise LLMError(f"Unexpected response shape: {e}") from e
+        if not text:
+            raise LLMError(f"Empty response from {self.name} (model={self.model})")
+        return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
+
+
+# ==================== ANTHROPIC ====================
+
+
+class AnthropicProvider(LLMProvider):
+    name = "anthropic"
+    DEFAULT_ENDPOINT = "https://api.anthropic.com"
+    API_VERSION = "2023-06-01"
+
+    def __init__(
+        self,
+        model: str,
+        api_key: Optional[str] = None,
+        endpoint: Optional[str] = None,
+        timeout: int = 120,
+        **_: object,
+    ):
+        key = api_key or os.environ.get("ANTHROPIC_API_KEY")
+        super().__init__(
+            model=model,
+            endpoint=endpoint or self.DEFAULT_ENDPOINT,
+            api_key=key,
+            timeout=timeout,
+        )
+
+    def check_available(self) -> tuple[bool, str]:
+        if not self.api_key:
+            return False, "ANTHROPIC_API_KEY not set (use --llm-api-key or env)"
+        # Don't probe — a live request would cost money. First real call will
+        # surface auth errors if the key is invalid.
+        return True, "ok"
+
+    def classify(self, system: str, user: str, json_mode: bool = True) -> LLMResponse:
+        if not self.api_key:
+            raise LLMError("Anthropic provider requires ANTHROPIC_API_KEY env or --llm-api-key")
+        sys_prompt = system
+        if json_mode:
+            sys_prompt += "\n\nRespond with valid JSON only, no prose."
+        body = {
+            "model": self.model,
+            "max_tokens": 2048,
+            "temperature": 0.1,
+            "system": sys_prompt,
+            "messages": [{"role": "user", "content": user}],
+        }
+        headers = {
+            "X-API-Key": self.api_key,
+            "anthropic-version": self.API_VERSION,
+        }
+        data = _http_post_json(
+            f"{self.endpoint}/v1/messages", body, headers=headers, timeout=self.timeout
+        )
+        try:
+            text = "".join(
+                b.get("text", "") for b in data.get("content", []) or [] if b.get("type") == "text"
+            )
+        except (AttributeError, TypeError) as e:
+            raise LLMError(f"Unexpected response shape: {e}") from e
+        if not text:
+            raise LLMError(f"Empty response from Anthropic (model={self.model})")
+        return LLMResponse(text=text, model=self.model, provider=self.name, raw=data)
+
+
+# ==================== FACTORY ====================
+
+
+PROVIDERS: dict[str, type[LLMProvider]] = {
+    "ollama": OllamaProvider,
+    "openai-compat": OpenAICompatProvider,
+    "anthropic": AnthropicProvider,
+}
+
+
+def get_provider(
+    name: str,
+    model: str,
+    endpoint: Optional[str] = None,
+    api_key: Optional[str] = None,
+    timeout: int = 120,
+) -> LLMProvider:
+    """Build a provider by name. Raises LLMError on unknown provider."""
+    cls = PROVIDERS.get(name)
+    if cls is None:
+        raise LLMError(f"Unknown provider '{name}'. Choices: {sorted(PROVIDERS.keys())}")
+    return cls(model=model, endpoint=endpoint, api_key=api_key, timeout=timeout)