From a0b7ba005d233d5bd9b3505cae77130259d3ea0a Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Sun, 26 Apr 2026 15:31:44 -0700 Subject: [PATCH] feat(privacy): treat Tailscale CGNAT range (100.64.0.0/10) as local MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2 files changed, 60 insertions, 0 deletions. 2 new tests (RED-first). Follow-up to #1224's privacy warning. The URL-based heuristic in ``mempalace.llm_client._endpoint_is_local`` shipped without recognizing Tailscale's CGNAT range (100.64.0.0/10), so a user running LM Studio, Ollama, or any local LLM accessible via a Tailscale-assigned 100.x.x.x address would currently get a wrong privacy warning — Tailscale addresses are network-private (only reachable inside the user's Tailnet) but they're not RFC1918, so the heuristic was treating them as external. This PR adds CGNAT recognition: when the hostname starts with ``100.`` AND the second octet is between 64 and 127 inclusive, it's classified as local. Addresses in 100.x.x.x outside that range (i.e. second octet < 64 or > 127) are regular allocated public space and remain external, so a user pointing at a public 100.0.0.1 still gets the warning. Concrete user impact: Before: ``mempalace init --llm-provider openai-compat --llm-endpoint http://100.100.50.50:1234`` (LM Studio on Tailnet) → triggers privacy warning incorrectly. After: same command → no warning. data stays inside the user's Tailnet, which is what the warning is supposed to protect against. TDD: 2 tests added in ``tests/test_llm_client.py``, both RED-first. 1. ``test_openai_compat_provider_tailscale_cgnat_endpoint_is_local`` — covers three Tailscale CGNAT addresses (start, middle, near-end of the range) and pins they're all classified local. This was the RED that drove the implementation. 2. ``test_openai_compat_provider_outside_tailscale_cgnat_is_external`` — pins the boundary on both sides: addresses with second octet 0-63 and 128-255 stay external. Prevents future "treat all 100.x.x.x as local" overcorrection. Tests: 1388 total mempalace tests pass. 2 pre-existing environmental failures unrelated to this change (chromadb optional dep). Ruff check + format both clean. Backwards compatible: only widens the local-recognition set. Anything classified local before is still classified local; anything classified external before remains so unless it's specifically in the CGNAT range. Out of scope (tracked for future iteration based on real user feedback, not built speculatively): pre-init confirmation prompt before sending to external API, persistent ``private-only`` config flag that refuses external endpoints entirely, explicit cloud-provider name detection ("Using Anthropic's hosted API at ..." vs the current generic warning). --- mempalace/llm_client.py | 16 ++++++++++++++ tests/test_llm_client.py | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/mempalace/llm_client.py b/mempalace/llm_client.py index 837247b..9d65142 100644 --- a/mempalace/llm_client.py +++ b/mempalace/llm_client.py @@ -49,6 +49,8 @@ def _endpoint_is_local(url: Optional[str]) -> bool: - localhost, 127.0.0.1, ::1 - hostnames ending in .local (mDNS/Bonjour) - IPv4 RFC1918: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 + - IPv4 CGNAT (Tailscale and similar VPN/tunnel networks): + 100.64.0.0/10 — first octet 100, second octet 64-127 inclusive - IPv6 unique-local addresses (fc00::/7) — fc.../fd... prefixes None / empty / unparseable URLs are treated as local (defensive default — @@ -81,6 +83,20 @@ def _endpoint_is_local(url: Optional[str]) -> bool: return True except ValueError: pass + if host.startswith("100."): + # 100.64.0.0/10 — Tailscale CGNAT range. First octet 100, second + # octet 64-127 inclusive. Users running a local LLM (LM Studio, + # Ollama, etc.) accessible via Tailscale on a 100.x.x.x address + # should not trigger the external-API privacy warning. + # 100.x.x.x outside this range is regular allocated public space + # and remains external. + parts = host.split(".") + if len(parts) >= 2: + try: + if 64 <= int(parts[1]) <= 127: + return True + except ValueError: + pass # IPv6 unique-local addresses fc00::/7 — match leading hex chars if host.startswith("fc") or host.startswith("fd"): return True diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py index d9dd6e9..f525994 100644 --- a/tests/test_llm_client.py +++ b/tests/test_llm_client.py @@ -378,3 +378,51 @@ def test_anthropic_provider_default_endpoint_is_external(): f"Default AnthropicProvider endpoint must be external; got " f"is_external_service={p.is_external_service} for endpoint={p.endpoint}" ) + + +# ── Tailscale CGNAT range (issue #25 follow-up to #24) ────────────────── +# +# Tailscale assigns addresses in 100.64.0.0/10 (CGNAT range): first octet +# always 100, second octet 64-127 inclusive. Users running LM Studio / +# Ollama / any local LLM accessible via Tailscale would currently +# (post-#24, pre-#25) get a wrong privacy warning because the heuristic +# doesn't recognize CGNAT as private. These tests pin the fix. + + +def test_openai_compat_provider_tailscale_cgnat_endpoint_is_local(): + """Tailscale CGNAT range (100.64.0.0/10) — IPs where the first octet + is 100 AND the second octet is 64-127 inclusive — must be classified + as local. Tailscale users running LM Studio on their Tailnet should + not trigger the external-API warning. + """ + cases = [ + ("http://100.64.0.1:1234", "start of CGNAT"), + ("http://100.100.50.50:1234", "middle of CGNAT (typical Tailscale assignment)"), + ("http://100.127.255.254:1234", "near end of CGNAT"), + ] + for endpoint, label in cases: + p = OpenAICompatProvider(model="any", endpoint=endpoint) + assert p.is_external_service is False, ( + f"Tailscale CGNAT address {endpoint} ({label}) must be classified " + f"local; got is_external_service={p.is_external_service}" + ) + + +def test_openai_compat_provider_outside_tailscale_cgnat_is_external(): + """Addresses in 100.x.x.x that fall OUTSIDE the CGNAT range + (100.64.0.0 - 100.127.255.255) are public IPs in regular allocated + space and must remain classified as external. Specifically: anything + where the second octet is < 64 or > 127. + """ + cases = [ + ("http://100.0.0.1:1234", "below CGNAT (public)"), + ("http://100.63.255.255:1234", "just below CGNAT (boundary)"), + ("http://100.128.0.0:1234", "just above CGNAT (boundary)"), + ("http://100.255.255.255:1234", "well above CGNAT"), + ] + for endpoint, label in cases: + p = OpenAICompatProvider(model="any", endpoint=endpoint) + assert p.is_external_service is True, ( + f"Address {endpoint} ({label}) is OUTSIDE Tailscale CGNAT and " + f"should remain external; got is_external_service={p.is_external_service}" + )