2025-05-12 13:52:53 -04:00
"""
Google Drive MCP Tools
This module provides MCP tools for interacting with Google Drive API.
"""
import logging
import asyncio
2025-05-12 17:32:38 -04:00
import re
2025-05-12 13:52:53 -04:00
import os
from typing import List , Optional , Dict , Any
2025-05-24 13:49:04 -04:00
import zipfile , xml . etree . ElementTree as ET
2025-05-12 13:52:53 -04:00
from mcp import types
from fastapi import Header
from googleapiclient . discovery import build
from googleapiclient . errors import HttpError
from googleapiclient . http import MediaIoBaseDownload # For file content
import io # For file content
2025-05-24 10:43:55 -04:00
from auth . google_auth import get_authenticated_google_service
from core . server import server
2025-05-14 09:35:48 -04:00
from core . server import (
2025-05-12 13:52:53 -04:00
DRIVE_READONLY_SCOPE ,
2025-05-14 09:35:48 -04:00
DRIVE_FILE_SCOPE ,
2025-05-12 13:52:53 -04:00
)
logger = logging . getLogger ( __name__ )
2025-05-24 13:49:04 -04:00
# -------------------------------------------------------------------------
# Helper: pull raw text from OOXML containers (docx / xlsx / pptx)
# -------------------------------------------------------------------------
def _extract_office_xml_text ( file_bytes : bytes , mime_type : str ) - > Optional [ str ] :
"""
Very light-weight XML scraper for Word, Excel, PowerPoint files.
Returns plain-text if something readable is found, else None.
No external deps – just std-lib zipfile + ElementTree.
"""
try :
with zipfile . ZipFile ( io . BytesIO ( file_bytes ) ) as zf :
# Map MIME → iterable of XML files to inspect
if mime_type == (
" application/vnd.openxmlformats-officedocument.wordprocessingml.document "
) :
targets = [ " word/document.xml " ]
elif mime_type == (
" application/vnd.openxmlformats-officedocument.presentationml.presentation "
) :
targets = [ n for n in zf . namelist ( ) if n . startswith ( " ppt/slides/slide " ) ]
elif mime_type == (
" application/vnd.openxmlformats-officedocument.spreadsheetml.sheet "
) :
targets = [ n for n in zf . namelist ( ) if n . startswith ( " xl/worksheets/sheet " ) ]
else :
return None
pieces : List [ str ] = [ ]
for member in targets :
try :
xml_root = ET . fromstring ( zf . read ( member ) )
# In both Word/PowerPoint the text is in <w:t> or <a:t>;
# in Excel, cell values are in <v>.
for elem in xml_root . iter ( ) :
tag = elem . tag . split ( " } " ) [ - 1 ] # strip namespace
if tag in { " t " , " v " } and elem . text :
pieces . append ( elem . text )
pieces . append ( " \n " ) # separator per part / sheet / slide
except Exception :
continue # ignore individual slide/sheet errors
text = " \n " . join ( pieces ) . strip ( )
return text or None
except Exception as e :
logger . error ( f " Failed to extract file content: { e } " )
# Any failure → quietly signal "not handled"
return None
2025-05-12 13:52:53 -04:00
@server.tool ( )
async def search_drive_files (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-05-12 13:52:53 -04:00
query : str ,
page_size : int = 10 ,
) - > types . CallToolResult :
"""
2025-05-12 14:32:44 -04:00
Searches for files and folders within a user ' s Google Drive based on a query string.
Args:
2025-05-24 10:43:55 -04:00
user_google_email (str): The user ' s Google email address. Required.
2025-05-12 14:32:44 -04:00
query (str): The search query string. Supports Google Drive search operators (e.g., ' name contains " report " ' , ' mimeType= " application/vnd.google-apps.document " ' , ' parents in " folderId " ' ).
page_size (int): The maximum number of files to return. Defaults to 10.
Returns:
types.CallToolResult: Contains a list of found files/folders with their details (ID, name, type, size, modified time, link),
2025-05-13 12:36:53 -04:00
an error message if the API call fails,
or an authentication guidance message if credentials are required.
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " search_drive_files "
logger . info ( f " [ { tool_name } ] Invoked. Email: ' { user_google_email } ' , Query: ' { query } ' " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_READONLY_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
return auth_result # Auth error
service , user_email = auth_result
2025-05-13 12:36:53 -04:00
2025-05-12 13:52:53 -04:00
try :
2025-05-12 17:32:38 -04:00
# Check if the query looks like a structured Drive query or free text
# Basic check for operators or common keywords used in structured queries
drive_query_pattern = r " ( \ w+ \ s*(=|!=|>|<|contains|in|has) \ s*[ ' \" ]?.+?[ ' \" ]?| \ w+ \ s*(=|!=|>|<) \ s* \ d+|trashed \ s*= \ s*(true|false)|starred \ s*= \ s*(true|false)|properties \ s+has \ s* \ { .*? \ }|appProperties \ s+has \ s* \ { .*? \ }| ' [^ ' ]+ ' \ s+in \ s+parents) "
is_structured_query = re . search ( drive_query_pattern , query , re . IGNORECASE )
if is_structured_query :
final_query = query # Use as is
else :
# Assume free text search, escape single quotes and wrap
escaped_query = query . replace ( " ' " , " \\ ' " )
final_query = f " fullText contains ' { escaped_query } ' "
logger . info ( f " [search_drive_files] Reformatting free text query ' { query } ' to ' { final_query } ' " )
2025-05-12 13:52:53 -04:00
results = await asyncio . to_thread (
service . files ( ) . list (
2025-05-12 17:32:38 -04:00
q = final_query , # Use the potentially modified query
2025-05-12 13:52:53 -04:00
pageSize = page_size ,
fields = " nextPageToken, files(id, name, mimeType, webViewLink, iconLink, modifiedTime, size) "
) . execute
)
files = results . get ( ' files ' , [ ] )
if not files :
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = f " No files found for ' { query } ' . " ) ] )
2025-05-24 11:54:31 -04:00
formatted_files_text_parts = [ f " Found { len ( files ) } files for { user_google_email } matching ' { query } ' : " ]
2025-05-12 13:52:53 -04:00
for item in files :
size_str = f " , Size: { item . get ( ' size ' , ' N/A ' ) } " if ' size ' in item else " "
formatted_files_text_parts . append (
f " - Name: \" { item [ ' name ' ] } \" (ID: { item [ ' id ' ] } , Type: { item [ ' mimeType ' ] } { size_str } , Modified: { item . get ( ' modifiedTime ' , ' N/A ' ) } ) Link: { item . get ( ' webViewLink ' , ' # ' ) } "
)
text_output = " \n " . join ( formatted_files_text_parts )
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = text_output ) ] )
except HttpError as error :
logger . error ( f " API error searching Drive files: { error } " , exc_info = True )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] )
except Exception as e :
logger . exception ( f " Unexpected error searching Drive files: { e } " )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] )
@server.tool ( )
async def get_drive_file_content (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-05-12 13:52:53 -04:00
file_id : str ,
) - > types . CallToolResult :
"""
2025-05-24 13:49:04 -04:00
Retrieves the content of a specific Google Drive file by ID.
• Native Google Docs, Sheets, Slides → exported as text / CSV.
• Office files (.docx, .xlsx, .pptx) → unzipped & parsed with std-lib to
extract readable text.
• Any other file → downloaded; tries UTF-8 decode, else notes binary.
2025-05-12 14:32:44 -04:00
Args:
2025-05-24 13:49:04 -04:00
user_google_email: The user’ s Google email address.
file_id: Drive file ID.
2025-05-12 14:32:44 -04:00
Returns:
2025-05-24 13:49:04 -04:00
types.CallToolResult with plain-text content (or error info).
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " get_drive_file_content "
logger . info ( f " [ { tool_name } ] Invoked. File ID: ' { file_id } ' " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_READONLY_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
2025-05-24 13:49:04 -04:00
return auth_result # authentication problem
service , _ = auth_result
2025-05-12 13:52:53 -04:00
try :
2025-05-24 13:49:04 -04:00
# ------------------------------------------------------------------
# Metadata lookup
# ------------------------------------------------------------------
2025-05-12 13:52:53 -04:00
file_metadata = await asyncio . to_thread (
2025-05-24 13:49:04 -04:00
service . files ( ) . get (
fileId = file_id , fields = " id, name, mimeType, webViewLink "
) . execute
2025-05-12 13:52:53 -04:00
)
2025-05-24 13:49:04 -04:00
mime_type = file_metadata . get ( " mimeType " , " " )
file_name = file_metadata . get ( " name " , " Unknown File " )
2025-05-13 12:36:53 -04:00
2025-05-24 13:49:04 -04:00
# ------------------------------------------------------------------
# Decide export vs. direct download
# ------------------------------------------------------------------
export_mime_type = {
" application/vnd.google-apps.document " : " text/plain " ,
" application/vnd.google-apps.spreadsheet " : " text/csv " ,
" application/vnd.google-apps.presentation " : " text/plain " ,
} . get ( mime_type )
2025-05-13 12:36:53 -04:00
2025-05-24 13:49:04 -04:00
request_obj = (
service . files ( ) . export_media ( fileId = file_id , mimeType = export_mime_type )
if export_mime_type
2025-05-12 13:52:53 -04:00
else service . files ( ) . get_media ( fileId = file_id )
2025-05-24 13:49:04 -04:00
)
2025-05-12 13:52:53 -04:00
2025-05-24 13:49:04 -04:00
# ------------------------------------------------------------------
# Download
# ------------------------------------------------------------------
2025-05-12 13:52:53 -04:00
fh = io . BytesIO ( )
downloader = MediaIoBaseDownload ( fh , request_obj )
loop = asyncio . get_event_loop ( )
2025-05-24 13:49:04 -04:00
done = False
2025-05-12 13:52:53 -04:00
while not done :
status , done = await loop . run_in_executor ( None , downloader . next_chunk )
2025-05-13 12:36:53 -04:00
2025-05-12 13:52:53 -04:00
file_content_bytes = fh . getvalue ( )
2025-05-24 13:49:04 -04:00
# ------------------------------------------------------------------
# Attempt Office XML extraction
# ------------------------------------------------------------------
office_text = _extract_office_xml_text ( file_content_bytes , mime_type )
if office_text :
body_text = office_text
else :
# Fallback: try UTF-8; otherwise flag binary
try :
body_text = file_content_bytes . decode ( " utf-8 " )
except UnicodeDecodeError :
body_text = (
f " [Binary or unsupported text encoding — "
f " { len ( file_content_bytes ) } bytes] "
)
# ------------------------------------------------------------------
# Assemble response
# ------------------------------------------------------------------
header = (
f ' File: " { file_name } " (ID: { file_id } , Type: { mime_type } ) \n '
f ' Link: { file_metadata . get ( " webViewLink " , " # " ) } \n \n --- CONTENT --- \n '
)
return types . CallToolResult (
content = [ types . TextContent ( type = " text " , text = header + body_text ) ]
)
2025-05-12 13:52:53 -04:00
except HttpError as error :
2025-05-24 13:49:04 -04:00
logger . error (
f " API error getting Drive file content for { file_id } : { error } " ,
exc_info = True ,
)
return types . CallToolResult (
isError = True ,
content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] ,
)
2025-05-12 13:52:53 -04:00
except Exception as e :
logger . exception ( f " Unexpected error getting Drive file content for { file_id } : { e } " )
2025-05-24 13:49:04 -04:00
return types . CallToolResult (
isError = True ,
content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] ,
)
2025-05-12 13:52:53 -04:00
@server.tool ( )
async def list_drive_items (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-05-12 13:52:53 -04:00
folder_id : str = ' root ' , # Default to root folder
page_size : int = 100 , # Default page size for listing
) - > types . CallToolResult :
"""
2025-05-12 14:32:44 -04:00
Lists files and folders directly within a specified Google Drive folder.
Defaults to the root folder if `folder_id` is not provided. Does not recurse into subfolders.
Args:
2025-05-24 10:43:55 -04:00
user_google_email (str): The user ' s Google email address. Required.
2025-05-12 14:32:44 -04:00
folder_id (str): The ID of the Google Drive folder to list items from. Defaults to ' root ' .
page_size (int): The maximum number of items to return per page. Defaults to 100.
Returns:
types.CallToolResult: Contains a list of files/folders within the specified folder, including their details (ID, name, type, size, modified time, link),
2025-05-13 12:36:53 -04:00
an error message if the API call fails or the folder is not accessible/found,
or an authentication guidance message if credentials are required.
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " list_drive_items "
logger . info ( f " [ { tool_name } ] Invoked. Email: ' { user_google_email } ' , Folder ID: ' { folder_id } ' " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_READONLY_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
return auth_result # Auth error
service , user_email = auth_result
2025-05-12 13:52:53 -04:00
try :
results = await asyncio . to_thread (
service . files ( ) . list (
2025-05-13 12:36:53 -04:00
q = f " ' { folder_id } ' in parents and trashed=false " , # List items directly in the folder
2025-05-12 13:52:53 -04:00
pageSize = page_size ,
fields = " nextPageToken, files(id, name, mimeType, webViewLink, iconLink, modifiedTime, size) "
) . execute
)
2025-05-13 12:36:53 -04:00
files = results . get ( ' files ' , [ ] )
if not files :
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = f " No items found in folder ' { folder_id } ' . " ) ] )
2025-05-12 13:52:53 -04:00
2025-05-24 11:54:31 -04:00
formatted_items_text_parts = [ f " Found { len ( files ) } items in folder ' { folder_id } ' for { user_google_email } : " ]
2025-05-13 12:36:53 -04:00
for item in files :
2025-05-12 13:52:53 -04:00
size_str = f " , Size: { item . get ( ' size ' , ' N/A ' ) } " if ' size ' in item else " "
formatted_items_text_parts . append (
2025-05-13 12:36:53 -04:00
f " - Name: \" { item [ ' name ' ] } \" (ID: { item [ ' id ' ] } , Type: { item [ ' mimeType ' ] } { size_str } , Modified: { item . get ( ' modifiedTime ' , ' N/A ' ) } ) Link: { item . get ( ' webViewLink ' , ' # ' ) } "
2025-05-12 13:52:53 -04:00
)
text_output = " \n " . join ( formatted_items_text_parts )
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = text_output ) ] )
except HttpError as error :
logger . error ( f " API error listing Drive items in folder { folder_id } : { error } " , exc_info = True )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] )
except Exception as e :
logger . exception ( f " Unexpected error listing Drive items in folder { folder_id } : { e } " )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] )
@server.tool ( )
async def create_drive_file (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-05-12 13:52:53 -04:00
file_name : str ,
content : str ,
folder_id : str = ' root ' , # Default to root folder
2025-05-13 12:36:53 -04:00
mime_type : str = ' text/plain ' , # Default to plain text
2025-05-12 13:52:53 -04:00
) - > types . CallToolResult :
"""
2025-05-13 12:36:53 -04:00
Creates a new file in Google Drive with the specified name, content, and optional parent folder.
2025-05-12 14:32:44 -04:00
Args:
2025-05-24 10:43:55 -04:00
user_google_email (str): The user ' s Google email address. Required.
2025-05-13 12:36:53 -04:00
file_name (str): The name for the new file.
content (str): The content to write to the file.
folder_id (str): The ID of the parent folder. Defaults to ' root ' .
mime_type (str): The MIME type of the file. Defaults to ' text/plain ' .
2025-05-12 14:32:44 -04:00
Returns:
2025-05-13 12:36:53 -04:00
A CallToolResult confirming creation or an error/auth guidance message.
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " create_drive_file "
logger . info ( f " [ { tool_name } ] Invoked. Email: ' { user_google_email } ' , File Name: { file_name } , Folder ID: { folder_id } " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_FILE_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
return auth_result # Auth error
service , user_email = auth_result
2025-05-12 13:52:53 -04:00
try :
file_metadata = {
' name ' : file_name ,
2025-05-13 12:36:53 -04:00
' parents ' : [ folder_id ] ,
' mimeType ' : mime_type
2025-05-12 13:52:53 -04:00
}
media = io . BytesIO ( content . encode ( ' utf-8 ' ) ) # Encode content to bytes
created_file = await asyncio . to_thread (
service . files ( ) . create (
body = file_metadata ,
2025-05-13 12:36:53 -04:00
media_body = MediaIoBaseDownload ( media , service . files ( ) . get_media ( fileId = ' placeholder ' ) ) , # Placeholder request for MediaIoBaseDownload
2025-05-12 13:52:53 -04:00
fields = ' id, name, webViewLink '
) . execute
)
2025-05-13 12:36:53 -04:00
link = created_file . get ( ' webViewLink ' , ' No link available ' )
2025-05-24 10:43:55 -04:00
confirmation_message = f " Successfully created file ' { created_file . get ( ' name ' , file_name ) } ' (ID: { created_file . get ( ' id ' , ' N/A ' ) } ) in folder ' { folder_id } ' for { user_email } . Link: { link } "
2025-05-13 12:36:53 -04:00
logger . info ( f " Successfully created file. Link: { link } " )
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = confirmation_message ) ] )
2025-05-24 10:43:55 -04:00
2025-05-12 13:52:53 -04:00
except HttpError as error :
2025-05-13 12:36:53 -04:00
logger . error ( f " API error creating Drive file ' { file_name } ' : { error } " , exc_info = True )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] )
2025-05-12 13:52:53 -04:00
except Exception as e :
2025-05-13 12:36:53 -04:00
logger . exception ( f " Unexpected error creating Drive file ' { file_name } ' : { e } " )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] )