2025-07-30 09:52:10 -04:00
"""
Google Custom Search (PSE) MCP Tools
This module provides MCP tools for interacting with Google Programmable Search Engine.
"""
import logging
import asyncio
2025-07-30 10:04:09 -04:00
import os
2025-08-02 18:55:35 -04:00
from typing import Optional , List , Literal
2025-07-30 09:52:10 -04:00
from auth . service_decorator import require_google_service
from core . server import server
from core . utils import handle_http_errors
logger = logging . getLogger ( __name__ )
@server.tool ( )
@handle_http_errors ( " search_custom " , is_read_only = True , service_type = " customsearch " )
@require_google_service ( " customsearch " , " customsearch " )
async def search_custom (
service ,
user_google_email : str ,
q : str ,
num : int = 10 ,
start : int = 1 ,
safe : Literal [ " active " , " moderate " , " off " ] = " off " ,
search_type : Optional [ Literal [ " image " ] ] = None ,
site_search : Optional [ str ] = None ,
site_search_filter : Optional [ Literal [ " e " , " i " ] ] = None ,
date_restrict : Optional [ str ] = None ,
file_type : Optional [ str ] = None ,
language : Optional [ str ] = None ,
2025-12-13 13:49:28 -08:00
country : Optional [ str ] = None ,
2026-03-01 12:36:09 -05:00
sites : Optional [ List [ str ] ] = None ,
2025-07-30 09:52:10 -04:00
) - > str :
"""
Performs a search using Google Custom Search JSON API.
Args:
user_google_email (str): The user ' s Google email address. Required.
q (str): The search query. Required.
num (int): Number of results to return (1-10). Defaults to 10.
start (int): The index of the first result to return (1-based). Defaults to 1.
safe (Literal[ " active " , " moderate " , " off " ]): Safe search level. Defaults to " off " .
search_type (Optional[Literal[ " image " ]]): Search for images if set to " image " .
site_search (Optional[str]): Restrict search to a specific site/domain.
site_search_filter (Optional[Literal[ " e " , " i " ]]): Exclude ( " e " ) or include ( " i " ) site_search results.
date_restrict (Optional[str]): Restrict results by date (e.g., " d5 " for past 5 days, " m3 " for past 3 months).
file_type (Optional[str]): Filter by file type (e.g., " pdf " , " doc " ).
language (Optional[str]): Language code for results (e.g., " lang_en " ).
country (Optional[str]): Country code for results (e.g., " countryUS " ).
2026-03-01 12:36:09 -05:00
sites (Optional[List[str]]): List of sites/domains to restrict search to (e.g., [ " example.com " , " docs.example.com " ]). When provided, results are limited to these sites.
2025-07-30 09:52:10 -04:00
Returns:
str: Formatted search results including title, link, and snippet for each result.
"""
2025-07-30 10:18:00 -04:00
# Get API key and search engine ID from environment
2025-12-13 13:49:28 -08:00
api_key = os . environ . get ( " GOOGLE_PSE_API_KEY " )
2025-07-30 10:04:09 -04:00
if not api_key :
2025-12-13 13:49:28 -08:00
raise ValueError (
" GOOGLE_PSE_API_KEY environment variable not set. Please set it to your Google Custom Search API key. "
)
cx = os . environ . get ( " GOOGLE_PSE_ENGINE_ID " )
2025-07-30 10:18:00 -04:00
if not cx :
2025-12-13 13:49:28 -08:00
raise ValueError (
" GOOGLE_PSE_ENGINE_ID environment variable not set. Please set it to your Programmable Search Engine ID. "
)
2025-07-30 10:18:00 -04:00
2025-12-13 13:49:28 -08:00
logger . info (
f " [search_custom] Invoked. Email: ' { user_google_email } ' , Query: ' { q } ' , CX: ' { cx } ' "
)
2025-07-30 10:04:09 -04:00
2026-03-01 12:36:09 -05:00
# Apply site restriction if sites are provided
if sites :
site_query = " OR " . join ( [ f " site: { site } " for site in sites ] )
q = f " { q } ( { site_query } ) "
logger . info ( f " [search_custom] Applied site restriction: { sites } " )
2025-07-30 09:52:10 -04:00
# Build the request parameters
params = {
2025-12-13 13:49:28 -08:00
" key " : api_key ,
" cx " : cx ,
" q " : q ,
" num " : num ,
" start " : start ,
" safe " : safe ,
2025-07-30 09:52:10 -04:00
}
# Add optional parameters
if search_type :
2025-12-13 13:49:28 -08:00
params [ " searchType " ] = search_type
2025-07-30 09:52:10 -04:00
if site_search :
2025-12-13 13:49:28 -08:00
params [ " siteSearch " ] = site_search
2025-07-30 09:52:10 -04:00
if site_search_filter :
2025-12-13 13:49:28 -08:00
params [ " siteSearchFilter " ] = site_search_filter
2025-07-30 09:52:10 -04:00
if date_restrict :
2025-12-13 13:49:28 -08:00
params [ " dateRestrict " ] = date_restrict
2025-07-30 09:52:10 -04:00
if file_type :
2025-12-13 13:49:28 -08:00
params [ " fileType " ] = file_type
2025-07-30 09:52:10 -04:00
if language :
2025-12-13 13:49:28 -08:00
params [ " lr " ] = language
2025-07-30 09:52:10 -04:00
if country :
2025-12-13 13:49:28 -08:00
params [ " cr " ] = country
2025-07-30 09:52:10 -04:00
# Execute the search request
2025-12-13 13:49:28 -08:00
result = await asyncio . to_thread ( service . cse ( ) . list ( * * params ) . execute )
2025-07-30 09:52:10 -04:00
# Extract search information
2025-12-13 13:49:28 -08:00
search_info = result . get ( " searchInformation " , { } )
total_results = search_info . get ( " totalResults " , " 0 " )
search_time = search_info . get ( " searchTime " , 0 )
2025-07-30 09:52:10 -04:00
# Extract search results
2025-12-13 13:49:28 -08:00
items = result . get ( " items " , [ ] )
2025-07-30 09:52:10 -04:00
# Format the response
confirmation_message = f """ Search Results for { user_google_email } :
- Query: " { q } "
- Search Engine ID: { cx }
- Total Results: { total_results }
- Search Time: { search_time : .3f } seconds
- Results Returned: { len ( items ) } (showing { start } to { start + len ( items ) - 1 } )
"""
if items :
confirmation_message + = " Results: \n "
for i , item in enumerate ( items , start ) :
2025-12-13 13:49:28 -08:00
title = item . get ( " title " , " No title " )
link = item . get ( " link " , " No link " )
snippet = item . get ( " snippet " , " No description available " ) . replace ( " \n " , " " )
2025-07-30 09:52:10 -04:00
confirmation_message + = f " \n { i } . { title } \n "
confirmation_message + = f " URL: { link } \n "
confirmation_message + = f " Snippet: { snippet } \n "
# Add additional metadata if available
2025-12-13 13:49:28 -08:00
if " pagemap " in item :
pagemap = item [ " pagemap " ]
if " metatags " in pagemap and pagemap [ " metatags " ] :
metatag = pagemap [ " metatags " ] [ 0 ]
if " og:type " in metatag :
2025-07-30 09:52:10 -04:00
confirmation_message + = f " Type: { metatag [ ' og:type ' ] } \n "
2025-12-13 13:49:28 -08:00
if " article:published_time " in metatag :
confirmation_message + = (
f " Published: { metatag [ ' article:published_time ' ] [ : 10 ] } \n "
)
2025-07-30 09:52:10 -04:00
else :
confirmation_message + = " \n No results found. "
# Add information about pagination
2025-12-13 13:49:28 -08:00
queries = result . get ( " queries " , { } )
if " nextPage " in queries :
next_start = queries [ " nextPage " ] [ 0 ] . get ( " startIndex " , 0 )
confirmation_message + = (
f " \n \n To see more results, search again with start= { next_start } "
)
2025-07-30 09:52:10 -04:00
logger . info ( f " Search completed successfully for { user_google_email } " )
return confirmation_message
@server.tool ( )
2025-12-13 13:49:28 -08:00
@handle_http_errors (
" get_search_engine_info " , is_read_only = True , service_type = " customsearch "
)
2025-07-30 09:52:10 -04:00
@require_google_service ( " customsearch " , " customsearch " )
2025-12-13 13:49:28 -08:00
async def get_search_engine_info ( service , user_google_email : str ) - > str :
2025-07-30 09:52:10 -04:00
"""
Retrieves metadata about a Programmable Search Engine.
Args:
user_google_email (str): The user ' s Google email address. Required.
Returns:
str: Information about the search engine including its configuration and available refinements.
"""
2025-07-30 10:18:00 -04:00
# Get API key and search engine ID from environment
2025-12-13 13:49:28 -08:00
api_key = os . environ . get ( " GOOGLE_PSE_API_KEY " )
2025-07-30 10:04:09 -04:00
if not api_key :
2025-12-13 13:49:28 -08:00
raise ValueError (
" GOOGLE_PSE_API_KEY environment variable not set. Please set it to your Google Custom Search API key. "
)
cx = os . environ . get ( " GOOGLE_PSE_ENGINE_ID " )
2025-07-30 10:18:00 -04:00
if not cx :
2025-12-13 13:49:28 -08:00
raise ValueError (
" GOOGLE_PSE_ENGINE_ID environment variable not set. Please set it to your Programmable Search Engine ID. "
)
2025-07-30 10:18:00 -04:00
2025-12-13 13:49:28 -08:00
logger . info (
f " [get_search_engine_info] Invoked. Email: ' { user_google_email } ' , CX: ' { cx } ' "
)
2025-07-30 10:04:09 -04:00
2025-07-30 09:52:10 -04:00
# Perform a minimal search to get the search engine context
params = {
2025-12-13 13:49:28 -08:00
" key " : api_key ,
" cx " : cx ,
" q " : " test " , # Minimal query to get metadata
" num " : 1 ,
2025-07-30 09:52:10 -04:00
}
2025-12-13 13:49:28 -08:00
result = await asyncio . to_thread ( service . cse ( ) . list ( * * params ) . execute )
2025-07-30 09:52:10 -04:00
# Extract context information
2025-12-13 13:49:28 -08:00
context = result . get ( " context " , { } )
title = context . get ( " title " , " Unknown " )
2025-07-30 09:52:10 -04:00
confirmation_message = f """ Search Engine Information for { user_google_email } :
- Search Engine ID: { cx }
- Title: { title }
"""
# Add facet information if available
2025-12-13 13:49:28 -08:00
if " facets " in context :
2025-07-30 09:52:10 -04:00
confirmation_message + = " \n Available Refinements: \n "
2025-12-13 13:49:28 -08:00
for facet in context [ " facets " ] :
2025-07-30 09:52:10 -04:00
for item in facet :
2025-12-13 13:49:28 -08:00
label = item . get ( " label " , " Unknown " )
anchor = item . get ( " anchor " , " Unknown " )
2025-07-30 09:52:10 -04:00
confirmation_message + = f " - { label } (anchor: { anchor } ) \n "
# Add search information
2025-12-13 13:49:28 -08:00
search_info = result . get ( " searchInformation " , { } )
2025-07-30 09:52:10 -04:00
if search_info :
2025-12-13 13:49:28 -08:00
total_results = search_info . get ( " totalResults " , " Unknown " )
2025-08-02 18:55:35 -04:00
confirmation_message + = " \n Search Statistics: \n "
2025-07-30 09:52:10 -04:00
confirmation_message + = f " - Total indexed results: { total_results } \n "
logger . info ( f " Search engine info retrieved successfully for { user_google_email } " )
return confirmation_message