diff --git a/gmail/gmail_tools.py b/gmail/gmail_tools.py index a79c327..e74bd33 100644 --- a/gmail/gmail_tools.py +++ b/gmail/gmail_tools.py @@ -29,11 +29,13 @@ logger = logging.getLogger(__name__) GMAIL_BATCH_SIZE = 25 GMAIL_REQUEST_DELAY = 0.1 +HTML_BODY_TRUNCATE_LIMIT = 20000 def _extract_message_body(payload): """ Helper function to extract plain text body from a Gmail message payload. + (Maintained for backward compatibility) Args: payload (dict): The message payload from Gmail API @@ -41,29 +43,82 @@ def _extract_message_body(payload): Returns: str: The plain text body content, or empty string if not found """ - body_data = "" + bodies = _extract_message_bodies(payload) + return bodies.get("text", "") + + +def _extract_message_bodies(payload): + """ + Helper function to extract both plain text and HTML bodies from a Gmail message payload. + + Args: + payload (dict): The message payload from Gmail API + + Returns: + dict: Dictionary with 'text' and 'html' keys containing body content + """ + text_body = "" + html_body = "" parts = [payload] if "parts" not in payload else payload.get("parts", []) part_queue = list(parts) # Use a queue for BFS traversal of parts while part_queue: part = part_queue.pop(0) - if part.get("mimeType") == "text/plain" and part.get("body", {}).get("data"): - data = base64.urlsafe_b64decode(part["body"]["data"]) - body_data = data.decode("utf-8", errors="ignore") - break # Found plain text body - elif part.get("mimeType", "").startswith("multipart/") and "parts" in part: - part_queue.extend(part.get("parts", [])) # Add sub-parts to the queue + mime_type = part.get("mimeType", "") + body_data = part.get("body", {}).get("data") - # If no plain text found, check the main payload body if it exists - if ( - not body_data - and payload.get("mimeType") == "text/plain" - and payload.get("body", {}).get("data") - ): - data = base64.urlsafe_b64decode(payload["body"]["data"]) - body_data = data.decode("utf-8", errors="ignore") + if body_data: + try: + decoded_data = base64.urlsafe_b64decode(body_data).decode("utf-8", errors="ignore") + if mime_type == "text/plain" and not text_body: + text_body = decoded_data + elif mime_type == "text/html" and not html_body: + html_body = decoded_data + except Exception as e: + logger.warning(f"Failed to decode body part: {e}") - return body_data + # Add sub-parts to queue for multipart messages + if mime_type.startswith("multipart/") and "parts" in part: + part_queue.extend(part.get("parts", [])) + + # Check the main payload if it has body data directly + if payload.get("body", {}).get("data"): + try: + decoded_data = base64.urlsafe_b64decode(payload["body"]["data"]).decode("utf-8", errors="ignore") + mime_type = payload.get("mimeType", "") + if mime_type == "text/plain" and not text_body: + text_body = decoded_data + elif mime_type == "text/html" and not html_body: + html_body = decoded_data + except Exception as e: + logger.warning(f"Failed to decode main payload body: {e}") + + return { + "text": text_body, + "html": html_body + } + + +def _format_body_content(text_body: str, html_body: str) -> str: + """ + Helper function to format message body content with HTML fallback and truncation. + + Args: + text_body: Plain text body content + html_body: HTML body content + + Returns: + Formatted body content string + """ + if text_body.strip(): + return text_body + elif html_body.strip(): + # Truncate very large HTML to keep responses manageable + if len(html_body) > HTML_BODY_TRUNCATE_LIMIT: + html_body = html_body[:HTML_BODY_TRUNCATE_LIMIT] + "\n\n[HTML content truncated...]" + return f"[HTML Content Converted]\n{html_body}" + else: + return "[No readable content found]" def _extract_headers(payload: dict, header_names: List[str]) -> Dict[str, str]: @@ -96,7 +151,7 @@ def _prepare_gmail_message( ) -> tuple[str, Optional[str]]: """ Prepare a Gmail message with threading support. - + Args: subject: Email subject body: Email body (plain text) @@ -106,7 +161,7 @@ def _prepare_gmail_message( thread_id: Optional Gmail thread ID to reply within in_reply_to: Optional Message-ID of the message being replied to references: Optional chain of Message-IDs for proper threading - + Returns: Tuple of (raw_message, thread_id) where raw_message is base64 encoded """ @@ -130,13 +185,13 @@ def _prepare_gmail_message( # Add reply headers for threading if in_reply_to: message["In-Reply-To"] = in_reply_to - + if references: message["References"] = references # Encode message raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode() - + return raw_message, thread_id @@ -317,9 +372,14 @@ async def get_gmail_message_content( .execute ) - # Extract the plain text body using helper function + # Extract both text and HTML bodies using enhanced helper function payload = message_full.get("payload", {}) - body_data = _extract_message_body(payload) + bodies = _extract_message_bodies(payload) + text_body = bodies.get("text", "") + html_body = bodies.get("html", "") + + # Format body content with HTML fallback + body_data = _format_body_content(text_body, html_body) content_text = "\n".join( [ @@ -481,14 +541,21 @@ async def get_gmail_messages_content_batch( headers = _extract_headers(payload, ["Subject", "From"]) subject = headers.get("Subject", "(no subject)") sender = headers.get("From", "(unknown sender)") - body = _extract_message_body(payload) + + # Extract both text and HTML bodies using enhanced helper function + bodies = _extract_message_bodies(payload) + text_body = bodies.get("text", "") + html_body = bodies.get("html", "") + + # Format body content with HTML fallback + body_data = _format_body_content(text_body, html_body) output_messages.append( f"Message ID: {mid}\n" f"Subject: {subject}\n" f"From: {sender}\n" f"Web Link: {_generate_gmail_web_url(mid)}\n" - f"\n{body or '[No text/plain body found]'}\n" + f"\n{body_data}\n" ) # Combine all messages with separators @@ -529,24 +596,24 @@ async def send_gmail_message( Returns: str: Confirmation message with the sent email's message ID. - + Examples: # Send a new email send_gmail_message(to="user@example.com", subject="Hello", body="Hi there!") - + # Send an email with CC and BCC send_gmail_message( - to="user@example.com", + to="user@example.com", cc="manager@example.com", bcc="archive@example.com", - subject="Project Update", + subject="Project Update", body="Here's the latest update..." ) - + # Send a reply send_gmail_message( - to="user@example.com", - subject="Re: Meeting tomorrow", + to="user@example.com", + subject="Re: Meeting tomorrow", body="Thanks for the update!", thread_id="thread_123", in_reply_to="", @@ -568,9 +635,9 @@ async def send_gmail_message( in_reply_to=in_reply_to, references=references, ) - + send_body = {"raw": raw_message} - + # Associate with thread if provided if thread_id_final: send_body["threadId"] = thread_id_final @@ -614,23 +681,23 @@ async def draft_gmail_message( Returns: str: Confirmation message with the created draft's ID. - + Examples: # Create a new draft draft_gmail_message(subject="Hello", body="Hi there!", to="user@example.com") - + # Create a draft with CC and BCC draft_gmail_message( - subject="Project Update", + subject="Project Update", body="Here's the latest update...", to="user@example.com", cc="manager@example.com", bcc="archive@example.com" ) - + # Create a reply draft draft_gmail_message( - subject="Re: Meeting tomorrow", + subject="Re: Meeting tomorrow", body="Thanks for the update!", to="user@example.com", thread_id="thread_123", @@ -656,7 +723,7 @@ async def draft_gmail_message( # Create a draft instead of sending draft_body = {"message": {"raw": raw_message}} - + # Associate with thread if provided if thread_id_final: draft_body["message"]["threadId"] = thread_id_final @@ -711,9 +778,14 @@ def _format_thread_content(thread_data: dict, thread_id: str) -> str: date = headers.get("Date", "(unknown date)") subject = headers.get("Subject", "(no subject)") - # Extract message body + # Extract both text and HTML bodies payload = message.get("payload", {}) - body_data = _extract_message_body(payload) + bodies = _extract_message_bodies(payload) + text_body = bodies.get("text", "") + html_body = bodies.get("html", "") + + # Format body content with HTML fallback + body_data = _format_body_content(text_body, html_body) # Add message to content content_lines.extend( @@ -731,7 +803,7 @@ def _format_thread_content(thread_data: dict, thread_id: str) -> str: content_lines.extend( [ "", - body_data or "[No text/plain body found]", + body_data, "", ] ) @@ -1101,3 +1173,4 @@ async def batch_modify_gmail_message_labels( actions.append(f"Removed labels: {', '.join(remove_label_ids)}") return f"Labels updated for {len(message_ids)} messages: {'; '.join(actions)}" +