2025-05-12 13:52:53 -04:00
"""
Google Drive MCP Tools
This module provides MCP tools for interacting with Google Drive API.
"""
import logging
import asyncio
2025-05-12 17:32:38 -04:00
import re
2025-05-12 13:52:53 -04:00
from typing import List , Optional , Dict , Any
from mcp import types
from googleapiclient . errors import HttpError
2025-06-03 14:16:19 -04:00
from googleapiclient . http import MediaIoBaseDownload , MediaIoBaseUpload
2025-05-30 11:09:56 -04:00
import io
2025-05-12 13:52:53 -04:00
2025-05-24 10:43:55 -04:00
from auth . google_auth import get_authenticated_google_service
2025-05-24 13:55:00 -04:00
from core . utils import extract_office_xml_text
2025-05-24 10:43:55 -04:00
from core . server import server
2025-05-14 09:35:48 -04:00
from core . server import (
2025-05-12 13:52:53 -04:00
DRIVE_READONLY_SCOPE ,
2025-05-14 09:35:48 -04:00
DRIVE_FILE_SCOPE ,
2025-05-12 13:52:53 -04:00
)
logger = logging . getLogger ( __name__ )
@server.tool ( )
async def search_drive_files (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-05-12 13:52:53 -04:00
query : str ,
page_size : int = 10 ,
2025-06-04 18:13:45 -04:00
drive_id : Optional [ str ] = None ,
include_items_from_all_drives : bool = True ,
corpora : Optional [ str ] = None ,
2025-05-12 13:52:53 -04:00
) - > types . CallToolResult :
"""
2025-06-04 18:13:45 -04:00
Searches for files and folders within a user ' s Google Drive, including shared drives.
2025-05-12 14:32:44 -04:00
Args:
2025-05-24 10:43:55 -04:00
user_google_email (str): The user ' s Google email address. Required.
2025-06-04 18:13:45 -04:00
query (str): The search query string. Supports Google Drive search operators.
2025-05-12 14:32:44 -04:00
page_size (int): The maximum number of files to return. Defaults to 10.
2025-06-04 18:13:45 -04:00
drive_id (Optional[str]): ID of the shared drive to search. If None, behavior depends on `corpora` and `include_items_from_all_drives`.
include_items_from_all_drives (bool): Whether shared drive items should be included in results. Defaults to True. This is effective when not specifying a `drive_id`.
corpora (Optional[str]): Bodies of items to query (e.g., ' user ' , ' domain ' , ' drive ' , ' allDrives ' ).
If ' drive_id ' is specified and ' corpora ' is None, it defaults to ' drive ' .
Otherwise, Drive API default behavior applies. Prefer ' user ' or ' drive ' over ' allDrives ' for efficiency.
2025-05-12 14:32:44 -04:00
Returns:
types.CallToolResult: Contains a list of found files/folders with their details (ID, name, type, size, modified time, link),
2025-05-13 12:36:53 -04:00
an error message if the API call fails,
or an authentication guidance message if credentials are required.
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " search_drive_files "
logger . info ( f " [ { tool_name } ] Invoked. Email: ' { user_google_email } ' , Query: ' { query } ' " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_READONLY_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
return auth_result # Auth error
service , user_email = auth_result
2025-05-13 12:36:53 -04:00
2025-05-12 13:52:53 -04:00
try :
2025-05-12 17:32:38 -04:00
# Check if the query looks like a structured Drive query or free text
drive_query_pattern = r " ( \ w+ \ s*(=|!=|>|<|contains|in|has) \ s*[ ' \" ]?.+?[ ' \" ]?| \ w+ \ s*(=|!=|>|<) \ s* \ d+|trashed \ s*= \ s*(true|false)|starred \ s*= \ s*(true|false)|properties \ s+has \ s* \ { .*? \ }|appProperties \ s+has \ s* \ { .*? \ }| ' [^ ' ]+ ' \ s+in \ s+parents) "
is_structured_query = re . search ( drive_query_pattern , query , re . IGNORECASE )
if is_structured_query :
2025-05-30 11:09:56 -04:00
final_query = query
2025-05-12 17:32:38 -04:00
else :
escaped_query = query . replace ( " ' " , " \\ ' " )
final_query = f " fullText contains ' { escaped_query } ' "
logger . info ( f " [search_drive_files] Reformatting free text query ' { query } ' to ' { final_query } ' " )
2025-06-04 18:13:45 -04:00
list_params = {
" q " : final_query ,
" pageSize " : page_size ,
" fields " : " nextPageToken, files(id, name, mimeType, webViewLink, iconLink, modifiedTime, size) " ,
" supportsAllDrives " : True ,
" includeItemsFromAllDrives " : include_items_from_all_drives ,
}
if drive_id :
list_params [ " driveId " ] = drive_id
if corpora :
list_params [ " corpora " ] = corpora
else :
list_params [ " corpora " ] = " drive "
elif corpora :
list_params [ " corpora " ] = corpora
2025-05-12 13:52:53 -04:00
results = await asyncio . to_thread (
2025-06-04 18:13:45 -04:00
service . files ( ) . list ( * * list_params ) . execute
2025-05-12 13:52:53 -04:00
)
files = results . get ( ' files ' , [ ] )
if not files :
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = f " No files found for ' { query } ' . " ) ] )
2025-05-24 11:54:31 -04:00
formatted_files_text_parts = [ f " Found { len ( files ) } files for { user_google_email } matching ' { query } ' : " ]
2025-05-12 13:52:53 -04:00
for item in files :
size_str = f " , Size: { item . get ( ' size ' , ' N/A ' ) } " if ' size ' in item else " "
formatted_files_text_parts . append (
f " - Name: \" { item [ ' name ' ] } \" (ID: { item [ ' id ' ] } , Type: { item [ ' mimeType ' ] } { size_str } , Modified: { item . get ( ' modifiedTime ' , ' N/A ' ) } ) Link: { item . get ( ' webViewLink ' , ' # ' ) } "
)
text_output = " \n " . join ( formatted_files_text_parts )
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = text_output ) ] )
except HttpError as error :
logger . error ( f " API error searching Drive files: { error } " , exc_info = True )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] )
except Exception as e :
logger . exception ( f " Unexpected error searching Drive files: { e } " )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] )
@server.tool ( )
async def get_drive_file_content (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-05-12 13:52:53 -04:00
file_id : str ,
) - > types . CallToolResult :
"""
2025-06-04 18:13:45 -04:00
Retrieves the content of a specific Google Drive file by ID, supporting files in shared drives.
2025-05-24 13:49:04 -04:00
• Native Google Docs, Sheets, Slides → exported as text / CSV.
• Office files (.docx, .xlsx, .pptx) → unzipped & parsed with std-lib to
extract readable text.
• Any other file → downloaded; tries UTF-8 decode, else notes binary.
2025-05-12 14:32:44 -04:00
Args:
2025-05-24 13:49:04 -04:00
user_google_email: The user’ s Google email address.
file_id: Drive file ID.
2025-05-12 14:32:44 -04:00
Returns:
2025-05-24 13:49:04 -04:00
types.CallToolResult with plain-text content (or error info).
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " get_drive_file_content "
logger . info ( f " [ { tool_name } ] Invoked. File ID: ' { file_id } ' " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_READONLY_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
2025-06-03 14:16:19 -04:00
return auth_result
2025-05-24 13:49:04 -04:00
service , _ = auth_result
2025-05-12 13:52:53 -04:00
try :
file_metadata = await asyncio . to_thread (
2025-05-24 13:49:04 -04:00
service . files ( ) . get (
2025-06-04 18:13:45 -04:00
fileId = file_id , fields = " id, name, mimeType, webViewLink " , supportsAllDrives = True
2025-05-24 13:49:04 -04:00
) . execute
2025-05-12 13:52:53 -04:00
)
2025-05-24 13:49:04 -04:00
mime_type = file_metadata . get ( " mimeType " , " " )
file_name = file_metadata . get ( " name " , " Unknown File " )
export_mime_type = {
" application/vnd.google-apps.document " : " text/plain " ,
" application/vnd.google-apps.spreadsheet " : " text/csv " ,
" application/vnd.google-apps.presentation " : " text/plain " ,
} . get ( mime_type )
2025-05-13 12:36:53 -04:00
2025-05-24 13:49:04 -04:00
request_obj = (
service . files ( ) . export_media ( fileId = file_id , mimeType = export_mime_type )
if export_mime_type
2025-05-12 13:52:53 -04:00
else service . files ( ) . get_media ( fileId = file_id )
2025-05-24 13:49:04 -04:00
)
2025-05-12 13:52:53 -04:00
fh = io . BytesIO ( )
downloader = MediaIoBaseDownload ( fh , request_obj )
loop = asyncio . get_event_loop ( )
2025-05-24 13:49:04 -04:00
done = False
2025-05-12 13:52:53 -04:00
while not done :
status , done = await loop . run_in_executor ( None , downloader . next_chunk )
2025-05-13 12:36:53 -04:00
2025-05-12 13:52:53 -04:00
file_content_bytes = fh . getvalue ( )
2025-05-24 13:49:04 -04:00
# Attempt Office XML extraction
2025-05-24 13:55:00 -04:00
office_text = extract_office_xml_text ( file_content_bytes , mime_type )
2025-05-24 13:49:04 -04:00
if office_text :
body_text = office_text
else :
# Fallback: try UTF-8; otherwise flag binary
try :
body_text = file_content_bytes . decode ( " utf-8 " )
except UnicodeDecodeError :
body_text = (
2025-05-24 14:49:32 -04:00
f " [Binary or unsupported text encoding for mimeType ' { mime_type } ' - "
2025-05-24 13:49:04 -04:00
f " { len ( file_content_bytes ) } bytes] "
)
# Assemble response
header = (
f ' File: " { file_name } " (ID: { file_id } , Type: { mime_type } ) \n '
f ' Link: { file_metadata . get ( " webViewLink " , " # " ) } \n \n --- CONTENT --- \n '
)
return types . CallToolResult (
content = [ types . TextContent ( type = " text " , text = header + body_text ) ]
)
2025-05-12 13:52:53 -04:00
except HttpError as error :
2025-05-24 13:49:04 -04:00
logger . error (
f " API error getting Drive file content for { file_id } : { error } " ,
exc_info = True ,
)
return types . CallToolResult (
isError = True ,
content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] ,
)
2025-05-12 13:52:53 -04:00
except Exception as e :
logger . exception ( f " Unexpected error getting Drive file content for { file_id } : { e } " )
2025-05-24 13:49:04 -04:00
return types . CallToolResult (
isError = True ,
content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] ,
)
2025-05-12 13:52:53 -04:00
@server.tool ( )
async def list_drive_items (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-06-03 14:16:19 -04:00
folder_id : str = ' root ' ,
2025-05-30 11:09:56 -04:00
page_size : int = 100 ,
2025-06-04 18:13:45 -04:00
drive_id : Optional [ str ] = None ,
include_items_from_all_drives : bool = True ,
corpora : Optional [ str ] = None ,
2025-05-12 13:52:53 -04:00
) - > types . CallToolResult :
"""
2025-06-04 18:13:45 -04:00
Lists files and folders, supporting shared drives.
If `drive_id` is specified, lists items within that shared drive. `folder_id` is then relative to that drive (or use drive_id as folder_id for root).
If `drive_id` is not specified, lists items from user ' s " My Drive " and accessible shared drives (if `include_items_from_all_drives` is True).
2025-05-12 14:32:44 -04:00
Args:
2025-05-24 10:43:55 -04:00
user_google_email (str): The user ' s Google email address. Required.
2025-06-04 18:13:45 -04:00
folder_id (str): The ID of the Google Drive folder. Defaults to ' root ' . For a shared drive, this can be the shared drive ' s ID to list its root, or a folder ID within that shared drive.
page_size (int): The maximum number of items to return. Defaults to 100.
drive_id (Optional[str]): ID of the shared drive. If provided, the listing is scoped to this drive.
include_items_from_all_drives (bool): Whether items from all accessible shared drives should be included if `drive_id` is not set. Defaults to True.
corpora (Optional[str]): Corpus to query ( ' user ' , ' drive ' , ' allDrives ' ). If `drive_id` is set and `corpora` is None, ' drive ' is used. If None and no `drive_id`, API defaults apply.
2025-05-12 14:32:44 -04:00
Returns:
2025-06-04 18:13:45 -04:00
types.CallToolResult: Contains a list of files/folders or an error.
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " list_drive_items "
logger . info ( f " [ { tool_name } ] Invoked. Email: ' { user_google_email } ' , Folder ID: ' { folder_id } ' " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_READONLY_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
2025-06-03 14:16:19 -04:00
return auth_result
2025-05-24 10:43:55 -04:00
service , user_email = auth_result
2025-05-12 13:52:53 -04:00
try :
2025-06-04 18:13:45 -04:00
final_query = f " ' { folder_id } ' in parents and trashed=false "
list_params = {
" q " : final_query ,
" pageSize " : page_size ,
" fields " : " nextPageToken, files(id, name, mimeType, webViewLink, iconLink, modifiedTime, size) " ,
" supportsAllDrives " : True ,
" includeItemsFromAllDrives " : include_items_from_all_drives ,
}
if drive_id :
list_params [ " driveId " ] = drive_id
if corpora :
list_params [ " corpora " ] = corpora
else :
list_params [ " corpora " ] = " drive "
elif corpora :
list_params [ " corpora " ] = corpora
2025-05-12 13:52:53 -04:00
results = await asyncio . to_thread (
2025-06-04 18:13:45 -04:00
service . files ( ) . list ( * * list_params ) . execute
2025-05-12 13:52:53 -04:00
)
2025-05-13 12:36:53 -04:00
files = results . get ( ' files ' , [ ] )
if not files :
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = f " No items found in folder ' { folder_id } ' . " ) ] )
2025-05-12 13:52:53 -04:00
2025-05-24 11:54:31 -04:00
formatted_items_text_parts = [ f " Found { len ( files ) } items in folder ' { folder_id } ' for { user_google_email } : " ]
2025-05-13 12:36:53 -04:00
for item in files :
2025-05-12 13:52:53 -04:00
size_str = f " , Size: { item . get ( ' size ' , ' N/A ' ) } " if ' size ' in item else " "
formatted_items_text_parts . append (
2025-05-13 12:36:53 -04:00
f " - Name: \" { item [ ' name ' ] } \" (ID: { item [ ' id ' ] } , Type: { item [ ' mimeType ' ] } { size_str } , Modified: { item . get ( ' modifiedTime ' , ' N/A ' ) } ) Link: { item . get ( ' webViewLink ' , ' # ' ) } "
2025-05-12 13:52:53 -04:00
)
text_output = " \n " . join ( formatted_items_text_parts )
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = text_output ) ] )
except HttpError as error :
logger . error ( f " API error listing Drive items in folder { folder_id } : { error } " , exc_info = True )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] )
except Exception as e :
logger . exception ( f " Unexpected error listing Drive items in folder { folder_id } : { e } " )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] )
@server.tool ( )
async def create_drive_file (
2025-05-24 10:43:55 -04:00
user_google_email : str ,
2025-05-12 13:52:53 -04:00
file_name : str ,
content : str ,
2025-05-30 11:09:56 -04:00
folder_id : str = ' root ' ,
mime_type : str = ' text/plain ' ,
2025-05-12 13:52:53 -04:00
) - > types . CallToolResult :
"""
2025-05-13 12:36:53 -04:00
Creates a new file in Google Drive with the specified name, content, and optional parent folder.
2025-05-12 14:32:44 -04:00
Args:
2025-05-24 10:43:55 -04:00
user_google_email (str): The user ' s Google email address. Required.
2025-05-13 12:36:53 -04:00
file_name (str): The name for the new file.
content (str): The content to write to the file.
2025-06-04 18:13:45 -04:00
folder_id (str): The ID of the parent folder. Defaults to ' root ' . For shared drives, this must be a folder ID within the shared drive.
2025-05-13 12:36:53 -04:00
mime_type (str): The MIME type of the file. Defaults to ' text/plain ' .
2025-05-12 14:32:44 -04:00
Returns:
2025-05-13 12:36:53 -04:00
A CallToolResult confirming creation or an error/auth guidance message.
2025-05-12 13:52:53 -04:00
"""
2025-05-24 10:43:55 -04:00
tool_name = " create_drive_file "
logger . info ( f " [ { tool_name } ] Invoked. Email: ' { user_google_email } ' , File Name: { file_name } , Folder ID: { folder_id } " )
auth_result = await get_authenticated_google_service (
service_name = " drive " ,
version = " v3 " ,
tool_name = tool_name ,
2025-05-12 13:52:53 -04:00
user_google_email = user_google_email ,
2025-05-24 10:43:55 -04:00
required_scopes = [ DRIVE_FILE_SCOPE ] ,
2025-05-12 13:52:53 -04:00
)
2025-05-24 10:43:55 -04:00
if isinstance ( auth_result , types . CallToolResult ) :
2025-05-30 11:09:56 -04:00
return auth_result
2025-05-24 10:43:55 -04:00
service , user_email = auth_result
2025-05-12 13:52:53 -04:00
try :
file_metadata = {
' name ' : file_name ,
2025-05-13 12:36:53 -04:00
' parents ' : [ folder_id ] ,
' mimeType ' : mime_type
2025-05-12 13:52:53 -04:00
}
2025-05-30 11:09:56 -04:00
media = io . BytesIO ( content . encode ( ' utf-8 ' ) )
2025-05-12 13:52:53 -04:00
created_file = await asyncio . to_thread (
service . files ( ) . create (
body = file_metadata ,
2025-05-27 11:49:20 -04:00
media_body = MediaIoBaseUpload ( media , mimetype = mime_type , resumable = True ) ,
2025-06-04 18:13:45 -04:00
fields = ' id, name, webViewLink ' ,
supportsAllDrives = True
2025-05-12 13:52:53 -04:00
) . execute
)
2025-05-13 12:36:53 -04:00
link = created_file . get ( ' webViewLink ' , ' No link available ' )
2025-05-24 10:43:55 -04:00
confirmation_message = f " Successfully created file ' { created_file . get ( ' name ' , file_name ) } ' (ID: { created_file . get ( ' id ' , ' N/A ' ) } ) in folder ' { folder_id } ' for { user_email } . Link: { link } "
2025-05-13 12:36:53 -04:00
logger . info ( f " Successfully created file. Link: { link } " )
return types . CallToolResult ( content = [ types . TextContent ( type = " text " , text = confirmation_message ) ] )
2025-05-24 10:43:55 -04:00
2025-05-12 13:52:53 -04:00
except HttpError as error :
2025-05-13 12:36:53 -04:00
logger . error ( f " API error creating Drive file ' { file_name } ' : { error } " , exc_info = True )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " API error: { error } " ) ] )
2025-05-12 13:52:53 -04:00
except Exception as e :
2025-05-13 12:36:53 -04:00
logger . exception ( f " Unexpected error creating Drive file ' { file_name } ' : { e } " )
return types . CallToolResult ( isError = True , content = [ types . TextContent ( type = " text " , text = f " Unexpected error: { e } " ) ] )