diff --git a/core/server.py b/core/server.py index 15a55e0..00d05f5 100644 --- a/core/server.py +++ b/core/server.py @@ -96,9 +96,7 @@ class SecureFastMCP(FastMCP): # Rebuild middleware stack app.middleware_stack = app.build_middleware_stack() - logger.info( - "Added middleware stack: WellKnownCacheControl, Session Management" - ) + logger.info("Added middleware stack: WellKnownCacheControl, Session Management") return app diff --git a/gmail/gmail_tools.py b/gmail/gmail_tools.py index ff875d5..3576c86 100644 --- a/gmail/gmail_tools.py +++ b/gmail/gmail_tools.py @@ -37,6 +37,19 @@ GMAIL_BATCH_SIZE = 25 GMAIL_REQUEST_DELAY = 0.1 HTML_BODY_TRUNCATE_LIMIT = 20000 GMAIL_METADATA_HEADERS = ["Subject", "From", "To", "Cc", "Message-ID", "Date"] +LOW_VALUE_TEXT_PLACEHOLDERS = ( + "your client does not support html", + "view this email in your browser", + "open this email in your browser", +) +LOW_VALUE_TEXT_FOOTER_MARKERS = ( + "mailing list", + "mailman/listinfo", + "unsubscribe", + "list-unsubscribe", + "manage preferences", +) +LOW_VALUE_TEXT_HTML_DIFF_MIN = 80 class _HTMLTextExtractor(HTMLParser): @@ -154,16 +167,29 @@ def _format_body_content(text_body: str, html_body: str) -> str: """ text_stripped = text_body.strip() html_stripped = html_body.strip() + html_text = _html_to_text(html_stripped).strip() if html_stripped else "" - # Detect useless fallback: HTML comments in text, or HTML is 50x+ longer - use_html = html_stripped and ( - not text_stripped - or "