Merge pull request #508 from fmgs31/search_with_file_type

feat: Search with file type
This commit is contained in:
Taylor Wilsdon
2026-02-28 18:12:37 -04:00
committed by GitHub
3 changed files with 495 additions and 5 deletions

View File

@@ -229,6 +229,82 @@ def build_drive_list_params(
SHORTCUT_MIME_TYPE = "application/vnd.google-apps.shortcut" SHORTCUT_MIME_TYPE = "application/vnd.google-apps.shortcut"
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder" FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
# RFC 6838 token-style MIME type validation (safe for Drive query interpolation).
MIME_TYPE_PATTERN = re.compile(r"^[A-Za-z0-9!#$&^_.+-]+/[A-Za-z0-9!#$&^_.+-]+$")
# Mapping from friendly type names to Google Drive MIME types.
# Raw MIME type strings (containing '/') are always accepted as-is.
FILE_TYPE_MIME_MAP: Dict[str, str] = {
"folder": "application/vnd.google-apps.folder",
"folders": "application/vnd.google-apps.folder",
"document": "application/vnd.google-apps.document",
"doc": "application/vnd.google-apps.document",
"documents": "application/vnd.google-apps.document",
"docs": "application/vnd.google-apps.document",
"spreadsheet": "application/vnd.google-apps.spreadsheet",
"sheet": "application/vnd.google-apps.spreadsheet",
"spreadsheets": "application/vnd.google-apps.spreadsheet",
"sheets": "application/vnd.google-apps.spreadsheet",
"presentation": "application/vnd.google-apps.presentation",
"presentations": "application/vnd.google-apps.presentation",
"slide": "application/vnd.google-apps.presentation",
"slides": "application/vnd.google-apps.presentation",
"form": "application/vnd.google-apps.form",
"forms": "application/vnd.google-apps.form",
"drawing": "application/vnd.google-apps.drawing",
"drawings": "application/vnd.google-apps.drawing",
"pdf": "application/pdf",
"pdfs": "application/pdf",
"shortcut": "application/vnd.google-apps.shortcut",
"shortcuts": "application/vnd.google-apps.shortcut",
"script": "application/vnd.google-apps.script",
"scripts": "application/vnd.google-apps.script",
"site": "application/vnd.google-apps.site",
"sites": "application/vnd.google-apps.site",
"jam": "application/vnd.google-apps.jam",
"jamboard": "application/vnd.google-apps.jam",
"jamboards": "application/vnd.google-apps.jam",
}
def resolve_file_type_mime(file_type: str) -> str:
"""
Resolve a friendly file type name or raw MIME type string to a Drive MIME type.
If `file_type` contains '/' it is returned as-is (treated as a raw MIME type).
Otherwise it is looked up in FILE_TYPE_MIME_MAP.
Args:
file_type: A friendly name ('folder', 'document', 'pdf', …) or a raw MIME
type string ('application/vnd.google-apps.document', …).
Returns:
str: The resolved MIME type string.
Raises:
ValueError: If the value is not a recognised friendly name and contains no '/'.
"""
normalized = file_type.strip()
if not normalized:
raise ValueError("file_type cannot be empty.")
if "/" in normalized:
normalized_mime = normalized.lower()
if not MIME_TYPE_PATTERN.fullmatch(normalized_mime):
raise ValueError(
f"Invalid MIME type '{file_type}'. Expected format like 'application/pdf'."
)
return normalized_mime
lower = normalized.lower()
if lower not in FILE_TYPE_MIME_MAP:
valid = ", ".join(sorted(FILE_TYPE_MIME_MAP.keys()))
raise ValueError(
f"Unknown file_type '{file_type}'. Pass a MIME type directly (e.g. "
f"'application/pdf') or use one of the friendly names: {valid}"
)
return FILE_TYPE_MIME_MAP[lower]
BASE_SHORTCUT_FIELDS = ( BASE_SHORTCUT_FIELDS = (
"id, mimeType, parents, shortcutDetails(targetId, targetMimeType)" "id, mimeType, parents, shortcutDetails(targetId, targetMimeType)"
) )

View File

@@ -36,6 +36,7 @@ from gdrive.drive_helpers import (
format_permission_info, format_permission_info,
get_drive_image_url, get_drive_image_url,
resolve_drive_item, resolve_drive_item,
resolve_file_type_mime,
resolve_folder_id, resolve_folder_id,
validate_expiration_time, validate_expiration_time,
validate_share_role, validate_share_role,
@@ -61,6 +62,7 @@ async def search_drive_files(
drive_id: Optional[str] = None, drive_id: Optional[str] = None,
include_items_from_all_drives: bool = True, include_items_from_all_drives: bool = True,
corpora: Optional[str] = None, corpora: Optional[str] = None,
file_type: Optional[str] = None,
detailed: bool = True, detailed: bool = True,
) -> str: ) -> str:
""" """
@@ -76,6 +78,11 @@ async def search_drive_files(
corpora (Optional[str]): Bodies of items to query (e.g., 'user', 'domain', 'drive', 'allDrives'). corpora (Optional[str]): Bodies of items to query (e.g., 'user', 'domain', 'drive', 'allDrives').
If 'drive_id' is specified and 'corpora' is None, it defaults to 'drive'. If 'drive_id' is specified and 'corpora' is None, it defaults to 'drive'.
Otherwise, Drive API default behavior applies. Prefer 'user' or 'drive' over 'allDrives' for efficiency. Otherwise, Drive API default behavior applies. Prefer 'user' or 'drive' over 'allDrives' for efficiency.
file_type (Optional[str]): Restrict results to a specific file type. Accepts a friendly
name ('folder', 'document'/'doc', 'spreadsheet'/'sheet',
'presentation'/'slides', 'form', 'drawing', 'pdf', 'shortcut',
'script', 'site', 'jam'/'jamboard') or any raw MIME type
string (e.g. 'application/pdf'). Defaults to None (all types).
detailed (bool): Whether to include size, modified time, and link in results. Defaults to True. detailed (bool): Whether to include size, modified time, and link in results. Defaults to True.
Returns: Returns:
@@ -83,7 +90,7 @@ async def search_drive_files(
Includes a nextPageToken line when more results are available. Includes a nextPageToken line when more results are available.
""" """
logger.info( logger.info(
f"[search_drive_files] Invoked. Email: '{user_google_email}', Query: '{query}'" f"[search_drive_files] Invoked. Email: '{user_google_email}', Query: '{query}', file_type: '{file_type}'"
) )
# Check if the query looks like a structured Drive query or free text # Check if the query looks like a structured Drive query or free text
@@ -103,6 +110,11 @@ async def search_drive_files(
f"[search_drive_files] Reformatting free text query '{query}' to '{final_query}'" f"[search_drive_files] Reformatting free text query '{query}' to '{final_query}'"
) )
if file_type is not None:
mime = resolve_file_type_mime(file_type)
final_query = f"({final_query}) and mimeType = '{mime}'"
logger.info(f"[search_drive_files] Added mimeType filter: '{mime}'")
list_params = build_drive_list_params( list_params = build_drive_list_params(
query=final_query, query=final_query,
page_size=page_size, page_size=page_size,
@@ -429,6 +441,7 @@ async def list_drive_items(
drive_id: Optional[str] = None, drive_id: Optional[str] = None,
include_items_from_all_drives: bool = True, include_items_from_all_drives: bool = True,
corpora: Optional[str] = None, corpora: Optional[str] = None,
file_type: Optional[str] = None,
detailed: bool = True, detailed: bool = True,
) -> str: ) -> str:
""" """
@@ -444,6 +457,11 @@ async def list_drive_items(
drive_id (Optional[str]): ID of the shared drive. If provided, the listing is scoped to this drive. drive_id (Optional[str]): ID of the shared drive. If provided, the listing is scoped to this drive.
include_items_from_all_drives (bool): Whether items from all accessible shared drives should be included if `drive_id` is not set. Defaults to True. include_items_from_all_drives (bool): Whether items from all accessible shared drives should be included if `drive_id` is not set. Defaults to True.
corpora (Optional[str]): Corpus to query ('user', 'drive', 'allDrives'). If `drive_id` is set and `corpora` is None, 'drive' is used. If None and no `drive_id`, API defaults apply. corpora (Optional[str]): Corpus to query ('user', 'drive', 'allDrives'). If `drive_id` is set and `corpora` is None, 'drive' is used. If None and no `drive_id`, API defaults apply.
file_type (Optional[str]): Restrict results to a specific file type. Accepts a friendly
name ('folder', 'document'/'doc', 'spreadsheet'/'sheet',
'presentation'/'slides', 'form', 'drawing', 'pdf', 'shortcut',
'script', 'site', 'jam'/'jamboard') or any raw MIME type
string (e.g. 'application/pdf'). Defaults to None (all types).
detailed (bool): Whether to include size, modified time, and link in results. Defaults to True. detailed (bool): Whether to include size, modified time, and link in results. Defaults to True.
Returns: Returns:
@@ -451,12 +469,17 @@ async def list_drive_items(
Includes a nextPageToken line when more results are available. Includes a nextPageToken line when more results are available.
""" """
logger.info( logger.info(
f"[list_drive_items] Invoked. Email: '{user_google_email}', Folder ID: '{folder_id}'" f"[list_drive_items] Invoked. Email: '{user_google_email}', Folder ID: '{folder_id}', File Type: '{file_type}'"
) )
resolved_folder_id = await resolve_folder_id(service, folder_id) resolved_folder_id = await resolve_folder_id(service, folder_id)
final_query = f"'{resolved_folder_id}' in parents and trashed=false" final_query = f"'{resolved_folder_id}' in parents and trashed=false"
if file_type is not None:
mime = resolve_file_type_mime(file_type)
final_query = f"({final_query}) and mimeType = '{mime}'"
logger.info(f"[list_drive_items] Added mimeType filter: '{mime}'")
list_params = build_drive_list_params( list_params = build_drive_list_params(
query=final_query, query=final_query,
page_size=page_size, page_size=page_size,

View File

@@ -1,9 +1,9 @@
""" """
Unit tests for Google Drive MCP tools. Unit tests for Google Drive MCP tools.
Tests create_drive_folder with mocked API responses, and the `detailed` Tests create_drive_folder with mocked API responses, plus coverage for
parameter added to search_drive_files, list_drive_items, and `search_drive_files` and `list_drive_items` pagination, `detailed` output,
build_drive_list_params. and `file_type` filtering behaviors.
""" """
import pytest import pytest
@@ -577,3 +577,394 @@ async def test_list_detailed_false_requests_compact_api_fields(mock_resolve_fold
assert "modifiedTime" not in call_kwargs["fields"] assert "modifiedTime" not in call_kwargs["fields"]
assert "webViewLink" not in call_kwargs["fields"] assert "webViewLink" not in call_kwargs["fields"]
assert "size" not in call_kwargs["fields"] assert "size" not in call_kwargs["fields"]
# ---------------------------------------------------------------------------
# Existing behavior coverage
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_search_free_text_returns_results():
"""Free-text query is wrapped in fullText contains and results are formatted."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {
"files": [
_make_file("f1", "My Doc", "application/vnd.google-apps.document"),
]
}
result = await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="my doc",
)
assert "Found 1 files" in result
assert "My Doc" in result
assert "f1" in result
@pytest.mark.asyncio
async def test_search_no_results():
"""No results returns a clear message."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
result = await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="nothing here",
)
assert "No files found" in result
@pytest.mark.asyncio
@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock)
async def test_list_items_basic(mock_resolve_folder):
"""Basic listing without filters returns all items."""
mock_resolve_folder.return_value = "resolved_root"
mock_service = Mock()
mock_service.files().list().execute.return_value = {
"files": [
_make_file("id1", "Folder A", "application/vnd.google-apps.folder"),
_make_file("id2", "Doc B", "application/vnd.google-apps.document"),
]
}
result = await _unwrap(list_drive_items)(
service=mock_service,
user_google_email="user@example.com",
folder_id="root",
)
assert "Found 2 items" in result
assert "Folder A" in result
assert "Doc B" in result
@pytest.mark.asyncio
@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock)
async def test_list_items_no_results(mock_resolve_folder):
"""Empty folder returns a clear message."""
mock_resolve_folder.return_value = "resolved_root"
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
result = await _unwrap(list_drive_items)(
service=mock_service,
user_google_email="user@example.com",
folder_id="root",
)
assert "No items found" in result
# ---------------------------------------------------------------------------
# file_type filtering
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_search_file_type_folder_adds_mime_filter():
"""file_type='folder' appends the folder MIME type to the query."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {
"files": [
_make_file("fold1", "My Folder", "application/vnd.google-apps.folder")
]
}
result = await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="my",
file_type="folder",
)
assert "Found 1 files" in result
assert "My Folder" in result
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType = 'application/vnd.google-apps.folder'" in call_kwargs["q"]
@pytest.mark.asyncio
async def test_search_file_type_document_alias():
"""Alias 'doc' resolves to the Google Docs MIME type."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="report",
file_type="doc",
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType = 'application/vnd.google-apps.document'" in call_kwargs["q"]
@pytest.mark.asyncio
async def test_search_file_type_plural_alias():
"""Plural aliases are resolved for friendlier natural-language usage."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="project",
file_type="folders",
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType = 'application/vnd.google-apps.folder'" in call_kwargs["q"]
@pytest.mark.asyncio
async def test_search_file_type_sheet_alias():
"""Alias 'sheet' resolves to the Google Sheets MIME type."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="budget",
file_type="sheet",
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType = 'application/vnd.google-apps.spreadsheet'" in call_kwargs["q"]
@pytest.mark.asyncio
async def test_search_file_type_raw_mime():
"""A raw MIME type string is passed through unchanged."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {
"files": [_make_file("p1", "Report.pdf", "application/pdf")]
}
result = await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="report",
file_type="application/pdf",
)
assert "Report.pdf" in result
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType = 'application/pdf'" in call_kwargs["q"]
@pytest.mark.asyncio
async def test_search_file_type_none_no_mime_filter():
"""When file_type is None no mimeType clause is added to the query."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="anything",
file_type=None,
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType" not in call_kwargs["q"]
@pytest.mark.asyncio
async def test_search_file_type_structured_query_combined():
"""file_type filter is appended even when the query is already structured."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="name contains 'budget'",
file_type="spreadsheet",
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
q = call_kwargs["q"]
assert "name contains 'budget'" in q
assert "mimeType = 'application/vnd.google-apps.spreadsheet'" in q
@pytest.mark.asyncio
async def test_search_file_type_unknown_raises_value_error():
"""An unrecognised friendly type name raises ValueError immediately."""
mock_service = Mock()
with pytest.raises(ValueError, match="Unknown file_type"):
await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="something",
file_type="notatype",
)
@pytest.mark.asyncio
@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock)
async def test_list_items_file_type_folder_adds_mime_filter(mock_resolve_folder):
"""file_type='folder' appends the folder MIME clause to the query."""
mock_resolve_folder.return_value = "resolved_root"
mock_service = Mock()
mock_service.files().list().execute.return_value = {
"files": [_make_file("sub1", "SubFolder", "application/vnd.google-apps.folder")]
}
result = await _unwrap(list_drive_items)(
service=mock_service,
user_google_email="user@example.com",
folder_id="root",
file_type="folder",
)
assert "Found 1 items" in result
assert "SubFolder" in result
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
q = call_kwargs["q"]
assert "'resolved_root' in parents" in q
assert "trashed=false" in q
assert "mimeType = 'application/vnd.google-apps.folder'" in q
@pytest.mark.asyncio
@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock)
async def test_list_items_file_type_spreadsheet(mock_resolve_folder):
"""file_type='spreadsheet' appends the Sheets MIME clause."""
mock_resolve_folder.return_value = "folder_xyz"
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(list_drive_items)(
service=mock_service,
user_google_email="user@example.com",
folder_id="folder_xyz",
file_type="spreadsheet",
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType = 'application/vnd.google-apps.spreadsheet'" in call_kwargs["q"]
@pytest.mark.asyncio
@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock)
async def test_list_items_file_type_raw_mime(mock_resolve_folder):
"""A raw MIME type string is passed through unchanged."""
mock_resolve_folder.return_value = "folder_abc"
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(list_drive_items)(
service=mock_service,
user_google_email="user@example.com",
folder_id="folder_abc",
file_type="application/pdf",
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType = 'application/pdf'" in call_kwargs["q"]
@pytest.mark.asyncio
@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock)
async def test_list_items_file_type_none_no_mime_filter(mock_resolve_folder):
"""When file_type is None no mimeType clause is added."""
mock_resolve_folder.return_value = "resolved_root"
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(list_drive_items)(
service=mock_service,
user_google_email="user@example.com",
folder_id="root",
file_type=None,
)
call_kwargs = mock_service.files.return_value.list.call_args.kwargs
assert "mimeType" not in call_kwargs["q"]
@pytest.mark.asyncio
@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock)
async def test_list_items_file_type_unknown_raises(mock_resolve_folder):
"""An unrecognised friendly type name raises ValueError."""
mock_resolve_folder.return_value = "resolved_root"
mock_service = Mock()
with pytest.raises(ValueError, match="Unknown file_type"):
await _unwrap(list_drive_items)(
service=mock_service,
user_google_email="user@example.com",
folder_id="root",
file_type="unknowntype",
)
# ---------------------------------------------------------------------------
# OR-precedence grouping
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_search_or_query_is_grouped_before_mime_filter():
"""An OR structured query is wrapped in parentheses so MIME filter precedence is correct."""
mock_service = Mock()
mock_service.files().list().execute.return_value = {"files": []}
await _unwrap(search_drive_files)(
service=mock_service,
user_google_email="user@example.com",
query="name contains 'a' or name contains 'b'",
file_type="document",
)
q = mock_service.files.return_value.list.call_args.kwargs["q"]
assert q.startswith("(")
assert "name contains 'a' or name contains 'b'" in q
assert ") and mimeType = 'application/vnd.google-apps.document'" in q
# ---------------------------------------------------------------------------
# MIME type validation
# ---------------------------------------------------------------------------
def test_resolve_file_type_mime_invalid_mime_raises():
"""A raw string with '/' but containing quotes raises ValueError."""
from gdrive.drive_helpers import resolve_file_type_mime
with pytest.raises(ValueError, match="Invalid MIME type"):
resolve_file_type_mime("application/pdf' or '1'='1")
def test_resolve_file_type_mime_strips_whitespace():
"""Leading/trailing whitespace is stripped from raw MIME strings."""
from gdrive.drive_helpers import resolve_file_type_mime
assert resolve_file_type_mime(" application/pdf ") == "application/pdf"
def test_resolve_file_type_mime_normalizes_case():
"""Raw MIME types are normalized to lowercase for Drive query consistency."""
from gdrive.drive_helpers import resolve_file_type_mime
assert resolve_file_type_mime("Application/PDF") == "application/pdf"
def test_resolve_file_type_mime_empty_raises():
"""Blank values are rejected with a clear validation error."""
from gdrive.drive_helpers import resolve_file_type_mime
with pytest.raises(ValueError, match="cannot be empty"):
resolve_file_type_mime(" ")