diff --git a/gdrive/drive_helpers.py b/gdrive/drive_helpers.py index c38550b..db6ec40 100644 --- a/gdrive/drive_helpers.py +++ b/gdrive/drive_helpers.py @@ -229,6 +229,82 @@ def build_drive_list_params( SHORTCUT_MIME_TYPE = "application/vnd.google-apps.shortcut" FOLDER_MIME_TYPE = "application/vnd.google-apps.folder" + +# RFC 6838 token-style MIME type validation (safe for Drive query interpolation). +MIME_TYPE_PATTERN = re.compile(r"^[A-Za-z0-9!#$&^_.+-]+/[A-Za-z0-9!#$&^_.+-]+$") + +# Mapping from friendly type names to Google Drive MIME types. +# Raw MIME type strings (containing '/') are always accepted as-is. +FILE_TYPE_MIME_MAP: Dict[str, str] = { + "folder": "application/vnd.google-apps.folder", + "folders": "application/vnd.google-apps.folder", + "document": "application/vnd.google-apps.document", + "doc": "application/vnd.google-apps.document", + "documents": "application/vnd.google-apps.document", + "docs": "application/vnd.google-apps.document", + "spreadsheet": "application/vnd.google-apps.spreadsheet", + "sheet": "application/vnd.google-apps.spreadsheet", + "spreadsheets": "application/vnd.google-apps.spreadsheet", + "sheets": "application/vnd.google-apps.spreadsheet", + "presentation": "application/vnd.google-apps.presentation", + "presentations": "application/vnd.google-apps.presentation", + "slide": "application/vnd.google-apps.presentation", + "slides": "application/vnd.google-apps.presentation", + "form": "application/vnd.google-apps.form", + "forms": "application/vnd.google-apps.form", + "drawing": "application/vnd.google-apps.drawing", + "drawings": "application/vnd.google-apps.drawing", + "pdf": "application/pdf", + "pdfs": "application/pdf", + "shortcut": "application/vnd.google-apps.shortcut", + "shortcuts": "application/vnd.google-apps.shortcut", + "script": "application/vnd.google-apps.script", + "scripts": "application/vnd.google-apps.script", + "site": "application/vnd.google-apps.site", + "sites": "application/vnd.google-apps.site", + "jam": "application/vnd.google-apps.jam", + "jamboard": "application/vnd.google-apps.jam", + "jamboards": "application/vnd.google-apps.jam", +} + + +def resolve_file_type_mime(file_type: str) -> str: + """ + Resolve a friendly file type name or raw MIME type string to a Drive MIME type. + + If `file_type` contains '/' it is returned as-is (treated as a raw MIME type). + Otherwise it is looked up in FILE_TYPE_MIME_MAP. + + Args: + file_type: A friendly name ('folder', 'document', 'pdf', …) or a raw MIME + type string ('application/vnd.google-apps.document', …). + + Returns: + str: The resolved MIME type string. + + Raises: + ValueError: If the value is not a recognised friendly name and contains no '/'. + """ + normalized = file_type.strip() + if not normalized: + raise ValueError("file_type cannot be empty.") + + if "/" in normalized: + normalized_mime = normalized.lower() + if not MIME_TYPE_PATTERN.fullmatch(normalized_mime): + raise ValueError( + f"Invalid MIME type '{file_type}'. Expected format like 'application/pdf'." + ) + return normalized_mime + lower = normalized.lower() + if lower not in FILE_TYPE_MIME_MAP: + valid = ", ".join(sorted(FILE_TYPE_MIME_MAP.keys())) + raise ValueError( + f"Unknown file_type '{file_type}'. Pass a MIME type directly (e.g. " + f"'application/pdf') or use one of the friendly names: {valid}" + ) + return FILE_TYPE_MIME_MAP[lower] + BASE_SHORTCUT_FIELDS = ( "id, mimeType, parents, shortcutDetails(targetId, targetMimeType)" ) diff --git a/gdrive/drive_tools.py b/gdrive/drive_tools.py index 65dcb95..8f34576 100644 --- a/gdrive/drive_tools.py +++ b/gdrive/drive_tools.py @@ -36,6 +36,7 @@ from gdrive.drive_helpers import ( format_permission_info, get_drive_image_url, resolve_drive_item, + resolve_file_type_mime, resolve_folder_id, validate_expiration_time, validate_share_role, @@ -61,6 +62,7 @@ async def search_drive_files( drive_id: Optional[str] = None, include_items_from_all_drives: bool = True, corpora: Optional[str] = None, + file_type: Optional[str] = None, detailed: bool = True, ) -> str: """ @@ -76,6 +78,11 @@ async def search_drive_files( corpora (Optional[str]): Bodies of items to query (e.g., 'user', 'domain', 'drive', 'allDrives'). If 'drive_id' is specified and 'corpora' is None, it defaults to 'drive'. Otherwise, Drive API default behavior applies. Prefer 'user' or 'drive' over 'allDrives' for efficiency. + file_type (Optional[str]): Restrict results to a specific file type. Accepts a friendly + name ('folder', 'document'/'doc', 'spreadsheet'/'sheet', + 'presentation'/'slides', 'form', 'drawing', 'pdf', 'shortcut', + 'script', 'site', 'jam'/'jamboard') or any raw MIME type + string (e.g. 'application/pdf'). Defaults to None (all types). detailed (bool): Whether to include size, modified time, and link in results. Defaults to True. Returns: @@ -83,7 +90,7 @@ async def search_drive_files( Includes a nextPageToken line when more results are available. """ logger.info( - f"[search_drive_files] Invoked. Email: '{user_google_email}', Query: '{query}'" + f"[search_drive_files] Invoked. Email: '{user_google_email}', Query: '{query}', file_type: '{file_type}'" ) # Check if the query looks like a structured Drive query or free text @@ -103,6 +110,11 @@ async def search_drive_files( f"[search_drive_files] Reformatting free text query '{query}' to '{final_query}'" ) + if file_type is not None: + mime = resolve_file_type_mime(file_type) + final_query = f"({final_query}) and mimeType = '{mime}'" + logger.info(f"[search_drive_files] Added mimeType filter: '{mime}'") + list_params = build_drive_list_params( query=final_query, page_size=page_size, @@ -429,6 +441,7 @@ async def list_drive_items( drive_id: Optional[str] = None, include_items_from_all_drives: bool = True, corpora: Optional[str] = None, + file_type: Optional[str] = None, detailed: bool = True, ) -> str: """ @@ -444,6 +457,11 @@ async def list_drive_items( drive_id (Optional[str]): ID of the shared drive. If provided, the listing is scoped to this drive. include_items_from_all_drives (bool): Whether items from all accessible shared drives should be included if `drive_id` is not set. Defaults to True. corpora (Optional[str]): Corpus to query ('user', 'drive', 'allDrives'). If `drive_id` is set and `corpora` is None, 'drive' is used. If None and no `drive_id`, API defaults apply. + file_type (Optional[str]): Restrict results to a specific file type. Accepts a friendly + name ('folder', 'document'/'doc', 'spreadsheet'/'sheet', + 'presentation'/'slides', 'form', 'drawing', 'pdf', 'shortcut', + 'script', 'site', 'jam'/'jamboard') or any raw MIME type + string (e.g. 'application/pdf'). Defaults to None (all types). detailed (bool): Whether to include size, modified time, and link in results. Defaults to True. Returns: @@ -451,12 +469,17 @@ async def list_drive_items( Includes a nextPageToken line when more results are available. """ logger.info( - f"[list_drive_items] Invoked. Email: '{user_google_email}', Folder ID: '{folder_id}'" + f"[list_drive_items] Invoked. Email: '{user_google_email}', Folder ID: '{folder_id}', File Type: '{file_type}'" ) resolved_folder_id = await resolve_folder_id(service, folder_id) final_query = f"'{resolved_folder_id}' in parents and trashed=false" + if file_type is not None: + mime = resolve_file_type_mime(file_type) + final_query = f"({final_query}) and mimeType = '{mime}'" + logger.info(f"[list_drive_items] Added mimeType filter: '{mime}'") + list_params = build_drive_list_params( query=final_query, page_size=page_size, diff --git a/tests/gdrive/test_drive_tools.py b/tests/gdrive/test_drive_tools.py index f260e62..420b504 100644 --- a/tests/gdrive/test_drive_tools.py +++ b/tests/gdrive/test_drive_tools.py @@ -1,9 +1,9 @@ """ Unit tests for Google Drive MCP tools. -Tests create_drive_folder with mocked API responses, and the `detailed` -parameter added to search_drive_files, list_drive_items, and -build_drive_list_params. +Tests create_drive_folder with mocked API responses, plus coverage for +`search_drive_files` and `list_drive_items` pagination, `detailed` output, +and `file_type` filtering behaviors. """ import pytest @@ -577,3 +577,394 @@ async def test_list_detailed_false_requests_compact_api_fields(mock_resolve_fold assert "modifiedTime" not in call_kwargs["fields"] assert "webViewLink" not in call_kwargs["fields"] assert "size" not in call_kwargs["fields"] + + +# --------------------------------------------------------------------------- +# Existing behavior coverage +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_search_free_text_returns_results(): + """Free-text query is wrapped in fullText contains and results are formatted.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = { + "files": [ + _make_file("f1", "My Doc", "application/vnd.google-apps.document"), + ] + } + + result = await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="my doc", + ) + + assert "Found 1 files" in result + assert "My Doc" in result + assert "f1" in result + + +@pytest.mark.asyncio +async def test_search_no_results(): + """No results returns a clear message.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + result = await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="nothing here", + ) + + assert "No files found" in result + + +@pytest.mark.asyncio +@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock) +async def test_list_items_basic(mock_resolve_folder): + """Basic listing without filters returns all items.""" + mock_resolve_folder.return_value = "resolved_root" + mock_service = Mock() + mock_service.files().list().execute.return_value = { + "files": [ + _make_file("id1", "Folder A", "application/vnd.google-apps.folder"), + _make_file("id2", "Doc B", "application/vnd.google-apps.document"), + ] + } + + result = await _unwrap(list_drive_items)( + service=mock_service, + user_google_email="user@example.com", + folder_id="root", + ) + + assert "Found 2 items" in result + assert "Folder A" in result + assert "Doc B" in result + + +@pytest.mark.asyncio +@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock) +async def test_list_items_no_results(mock_resolve_folder): + """Empty folder returns a clear message.""" + mock_resolve_folder.return_value = "resolved_root" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + result = await _unwrap(list_drive_items)( + service=mock_service, + user_google_email="user@example.com", + folder_id="root", + ) + + assert "No items found" in result + + +# --------------------------------------------------------------------------- +# file_type filtering +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_search_file_type_folder_adds_mime_filter(): + """file_type='folder' appends the folder MIME type to the query.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = { + "files": [ + _make_file("fold1", "My Folder", "application/vnd.google-apps.folder") + ] + } + + result = await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="my", + file_type="folder", + ) + + assert "Found 1 files" in result + assert "My Folder" in result + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType = 'application/vnd.google-apps.folder'" in call_kwargs["q"] + + +@pytest.mark.asyncio +async def test_search_file_type_document_alias(): + """Alias 'doc' resolves to the Google Docs MIME type.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="report", + file_type="doc", + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType = 'application/vnd.google-apps.document'" in call_kwargs["q"] + + +@pytest.mark.asyncio +async def test_search_file_type_plural_alias(): + """Plural aliases are resolved for friendlier natural-language usage.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="project", + file_type="folders", + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType = 'application/vnd.google-apps.folder'" in call_kwargs["q"] + + +@pytest.mark.asyncio +async def test_search_file_type_sheet_alias(): + """Alias 'sheet' resolves to the Google Sheets MIME type.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="budget", + file_type="sheet", + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType = 'application/vnd.google-apps.spreadsheet'" in call_kwargs["q"] + + +@pytest.mark.asyncio +async def test_search_file_type_raw_mime(): + """A raw MIME type string is passed through unchanged.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = { + "files": [_make_file("p1", "Report.pdf", "application/pdf")] + } + + result = await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="report", + file_type="application/pdf", + ) + + assert "Report.pdf" in result + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType = 'application/pdf'" in call_kwargs["q"] + + +@pytest.mark.asyncio +async def test_search_file_type_none_no_mime_filter(): + """When file_type is None no mimeType clause is added to the query.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="anything", + file_type=None, + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType" not in call_kwargs["q"] + + +@pytest.mark.asyncio +async def test_search_file_type_structured_query_combined(): + """file_type filter is appended even when the query is already structured.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="name contains 'budget'", + file_type="spreadsheet", + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + q = call_kwargs["q"] + assert "name contains 'budget'" in q + assert "mimeType = 'application/vnd.google-apps.spreadsheet'" in q + + +@pytest.mark.asyncio +async def test_search_file_type_unknown_raises_value_error(): + """An unrecognised friendly type name raises ValueError immediately.""" + mock_service = Mock() + + with pytest.raises(ValueError, match="Unknown file_type"): + await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="something", + file_type="notatype", + ) + + +@pytest.mark.asyncio +@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock) +async def test_list_items_file_type_folder_adds_mime_filter(mock_resolve_folder): + """file_type='folder' appends the folder MIME clause to the query.""" + mock_resolve_folder.return_value = "resolved_root" + mock_service = Mock() + mock_service.files().list().execute.return_value = { + "files": [_make_file("sub1", "SubFolder", "application/vnd.google-apps.folder")] + } + + result = await _unwrap(list_drive_items)( + service=mock_service, + user_google_email="user@example.com", + folder_id="root", + file_type="folder", + ) + + assert "Found 1 items" in result + assert "SubFolder" in result + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + q = call_kwargs["q"] + assert "'resolved_root' in parents" in q + assert "trashed=false" in q + assert "mimeType = 'application/vnd.google-apps.folder'" in q + + +@pytest.mark.asyncio +@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock) +async def test_list_items_file_type_spreadsheet(mock_resolve_folder): + """file_type='spreadsheet' appends the Sheets MIME clause.""" + mock_resolve_folder.return_value = "folder_xyz" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(list_drive_items)( + service=mock_service, + user_google_email="user@example.com", + folder_id="folder_xyz", + file_type="spreadsheet", + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType = 'application/vnd.google-apps.spreadsheet'" in call_kwargs["q"] + + +@pytest.mark.asyncio +@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock) +async def test_list_items_file_type_raw_mime(mock_resolve_folder): + """A raw MIME type string is passed through unchanged.""" + mock_resolve_folder.return_value = "folder_abc" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(list_drive_items)( + service=mock_service, + user_google_email="user@example.com", + folder_id="folder_abc", + file_type="application/pdf", + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType = 'application/pdf'" in call_kwargs["q"] + + +@pytest.mark.asyncio +@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock) +async def test_list_items_file_type_none_no_mime_filter(mock_resolve_folder): + """When file_type is None no mimeType clause is added.""" + mock_resolve_folder.return_value = "resolved_root" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(list_drive_items)( + service=mock_service, + user_google_email="user@example.com", + folder_id="root", + file_type=None, + ) + + call_kwargs = mock_service.files.return_value.list.call_args.kwargs + assert "mimeType" not in call_kwargs["q"] + + +@pytest.mark.asyncio +@patch("gdrive.drive_tools.resolve_folder_id", new_callable=AsyncMock) +async def test_list_items_file_type_unknown_raises(mock_resolve_folder): + """An unrecognised friendly type name raises ValueError.""" + mock_resolve_folder.return_value = "resolved_root" + mock_service = Mock() + + with pytest.raises(ValueError, match="Unknown file_type"): + await _unwrap(list_drive_items)( + service=mock_service, + user_google_email="user@example.com", + folder_id="root", + file_type="unknowntype", + ) + + +# --------------------------------------------------------------------------- +# OR-precedence grouping +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_search_or_query_is_grouped_before_mime_filter(): + """An OR structured query is wrapped in parentheses so MIME filter precedence is correct.""" + mock_service = Mock() + mock_service.files().list().execute.return_value = {"files": []} + + await _unwrap(search_drive_files)( + service=mock_service, + user_google_email="user@example.com", + query="name contains 'a' or name contains 'b'", + file_type="document", + ) + + q = mock_service.files.return_value.list.call_args.kwargs["q"] + assert q.startswith("(") + assert "name contains 'a' or name contains 'b'" in q + assert ") and mimeType = 'application/vnd.google-apps.document'" in q + + +# --------------------------------------------------------------------------- +# MIME type validation +# --------------------------------------------------------------------------- + + +def test_resolve_file_type_mime_invalid_mime_raises(): + """A raw string with '/' but containing quotes raises ValueError.""" + from gdrive.drive_helpers import resolve_file_type_mime + + with pytest.raises(ValueError, match="Invalid MIME type"): + resolve_file_type_mime("application/pdf' or '1'='1") + + +def test_resolve_file_type_mime_strips_whitespace(): + """Leading/trailing whitespace is stripped from raw MIME strings.""" + from gdrive.drive_helpers import resolve_file_type_mime + + assert resolve_file_type_mime(" application/pdf ") == "application/pdf" + + +def test_resolve_file_type_mime_normalizes_case(): + """Raw MIME types are normalized to lowercase for Drive query consistency.""" + from gdrive.drive_helpers import resolve_file_type_mime + + assert resolve_file_type_mime("Application/PDF") == "application/pdf" + + +def test_resolve_file_type_mime_empty_raises(): + """Blank values are rejected with a clear validation error.""" + from gdrive.drive_helpers import resolve_file_type_mime + + with pytest.raises(ValueError, match="cannot be empty"): + resolve_file_type_mime(" ")