commit 395f0e2029f4bbd4f719986b2f312e953f9b1e23 Author: jason Date: Tue Mar 17 19:23:33 2026 -0500 feat: initial commit from workspace-mcp diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..250863f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,42 @@ +# Git and version control +.git +.gitignore +gitdiff.txt + +# Documentation and notes +*.md +AUTHENTICATION_REFACTOR_PROPOSAL.md +leverage_fastmcp_responses.md + +# Test files and coverage +tests/ +htmlcov/ +.coverage +pytest_out.txt + +# Build artifacts +build/ +dist/ +*.egg-info/ + +# Development files +mcp_server_debug.log +.credentials/ + +# Cache and temporary files +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +.pytest_cache/ + +# IDE files +.vscode/ +.idea/ +*.swp +*.swo + +# OS files +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/.dxtignore b/.dxtignore new file mode 100644 index 0000000..92c2268 --- /dev/null +++ b/.dxtignore @@ -0,0 +1,67 @@ +# ============================================================================= +# .dxtignore — defense-in-depth denylist for dxt pack +# +# IMPORTANT: Always use dxt-safe-pack.sh instead of bare `dxt pack`. +# The script guarantees only git-tracked files are packaged. +# This file exists as a safety net in case someone runs `dxt pack` directly. +# ============================================================================= + +# ---- Caches ---------------------------------------------------------------- +.mypy_cache +__pycache__ +*.py[cod] +*.so +.pytest_cache +.ruff_cache + +# ---- Build / packaging ----------------------------------------------------- +*.egg-info +build/ +dist/ + +# ---- Environments & tooling ------------------------------------------------ +.env +.venv +venv/ +.idea/ +.vscode/ +.claude/ +.serena/ +node_modules/ + +# ---- macOS ----------------------------------------------------------------- +.DS_Store + +# ---- Secrets & credentials — CRITICAL -------------------------------------- +client_secret.json +.credentials +credentials.json +token.pickle +*_token +*_secret +.mcpregistry_* +*.key +*.pem +*.p12 +*.crt +*.der + +# ---- Test & debug ----------------------------------------------------------- +.coverage +pytest_out.txt +mcp_server_debug.log +diff_output.txt + +# ---- Temp & editor files ---------------------------------------------------- +*.tmp +*.log +*.pid +*.swp +*.swo +*~ + +# ---- Development artifacts not for distribution ----------------------------- +scripts/ +.beads +.github/ +tests/ diff --git a/.env.oauth21 b/.env.oauth21 new file mode 100644 index 0000000..a7d4927 --- /dev/null +++ b/.env.oauth21 @@ -0,0 +1,62 @@ +# OAuth 2.1 Configuration Example +# Copy this to .env and update with your Google OAuth credentials + +# Required: Google OAuth 2.0 Client Credentials +# Note: OAuth 2.1 will automatically use GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET +# if OAUTH2_CLIENT_ID and OAUTH2_CLIENT_SECRET are not set + +GOOGLE_OAUTH_CLIENT_ID="your-google-client-id" +GOOGLE_OAUTH_CLIENT_SECRET="your-google-client-secret" + +# Development Settings (set to true for localhost testing) +OAUTH2_ALLOW_INSECURE_TRANSPORT=false +OAUTH2_ENABLE_DEBUG=false + +# Legacy Compatibility (recommended during migration) +OAUTH2_ENABLE_LEGACY_AUTH=true + +# --------------------------------------------------------------------------- +# FastMCP OAuth Proxy Storage Backends (OAuth 2.1) +# +# Storage backend for OAuth proxy state. Options: memory, disk, valkey +# Default: FastMCP's built-in default (disk on Mac/Windows, memory on Linux) +# +# WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND=memory|disk|valkey +# +# --------------------------------------------------------------------------- +# Memory Storage (default on Linux) +# - Fast, no persistence, data lost on restart +# - Best for: development, testing, stateless deployments +# +# WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND=memory +# +# --------------------------------------------------------------------------- +# Disk Storage (default on Mac/Windows) +# - Persists across restarts, single-server only +# - Best for: single-server production, persistent caching +# +# WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND=disk +# WORKSPACE_MCP_OAUTH_PROXY_DISK_DIRECTORY=~/.fastmcp/oauth-proxy +# +# --------------------------------------------------------------------------- +# Valkey/Redis Storage +# - Distributed, multi-server support +# - Best for: production, multi-server deployments, cloud native +# +# WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND=valkey +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_HOST=localhost +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_PORT=6379 +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_USE_TLS=false +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_DB=0 +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_USERNAME= +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_PASSWORD= +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_REQUEST_TIMEOUT_MS=5000 +# WORKSPACE_MCP_OAUTH_PROXY_VALKEY_CONNECTION_TIMEOUT_MS=10000 +# +# --------------------------------------------------------------------------- +# Encryption: +# - Disk and Valkey storage are encrypted with Fernet. +# - Key derived from FASTMCP_SERVER_AUTH_GOOGLE_JWT_SIGNING_KEY if set, +# otherwise from GOOGLE_OAUTH_CLIENT_SECRET. +# - For stable decryption across client-secret rotations, set +# FASTMCP_SERVER_AUTH_GOOGLE_JWT_SIGNING_KEY explicitly. diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..a558a66 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,19 @@ +# .github/FUNDING.yml +github: taylorwilsdon + +# --- Optional platforms (one value per platform) --- +# patreon: REPLACE_ME +# open_collective: REPLACE_ME +# ko_fi: REPLACE_ME +# liberapay: REPLACE_ME +# issuehunt: REPLACE_ME +# polar: REPLACE_ME +# buy_me_a_coffee: REPLACE_ME +# thanks_dev: u/gh/REPLACE_ME_GITHUB_USERNAME + +# Tidelift uses platform/package (npm, pypi, rubygems, maven, packagist, nuget) +# tidelift: pypi/REPLACE_ME_PACKAGE_NAME + +# Up to 4 custom URLs (wrap in quotes if they contain :) +# Good pattern: link to a SUPPORT.md that describes how to sponsor, or your donation page. +# custom: ["https://REPLACE_ME_DOMAIN/sponsor", "https://github.com/REPLACE_ME_OWNER/REPLACE_ME_REPO/blob/main/SUPPORT.md"] diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..77bc1f0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,35 @@ +--- +name: Bug Report +about: Create a report to help us improve Google Workspace MCP +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Startup Logs** +Include the startup output including everything from the Active Configuration section to "Uvicorn running" + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Platform (please complete the following information):** + - OS: [e.g. macOS, Ubuntu, Windows] +- Container: [if applicable, e.g. Docker) + - Version [e.g. v1.2.0] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..5990d9c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..dc68863 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,31 @@ +## Description +Brief description of the changes in this PR. + +## Type of Change +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Documentation update + +## Testing +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] I have tested this change manually + +## Checklist +- [ ] My code follows the style guidelines of this project +- [ ] I have performed a self-review of my own code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] My changes generate no new warnings +- [ ] **I have enabled "Allow edits from maintainers" for this pull request** + +## Additional Notes +Add any other context about the pull request here. + +--- + +**⚠️ IMPORTANT:** This repository requires that you enable "Allow edits from maintainers" when creating your pull request. This allows maintainers to make small fixes and improvements directly to your branch, speeding up the review process. + +To enable this setting: +1. When creating the PR, check the "Allow edits from maintainers" checkbox +2. If you've already created the PR, you can enable this in the PR sidebar under "Allow edits from maintainers" \ No newline at end of file diff --git a/.github/workflows/check-maintainer-edits.yml b/.github/workflows/check-maintainer-edits.yml new file mode 100644 index 0000000..525c530 --- /dev/null +++ b/.github/workflows/check-maintainer-edits.yml @@ -0,0 +1,54 @@ +name: Check Maintainer Edits Enabled + +on: + pull_request: + types: [opened, synchronize, reopened, edited] + +permissions: + pull-requests: read + issues: write + +jobs: + check-maintainer-edits: + runs-on: ubuntu-latest + if: github.event.pull_request.head.repo.fork == true || github.event.pull_request.head.repo.full_name != github.repository + + steps: + - name: Check if maintainer edits are enabled + uses: actions/github-script@v7 + with: + script: | + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number + }); + + if (!pr.maintainer_can_modify) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: '⚠️ **Maintainer edits not enabled**\n\n' + + 'This repository requires that you enable "Allow edits from maintainers" for your pull request. This allows maintainers to make small fixes and improvements directly to your branch, which speeds up the review process.\n\n' + + '**To enable this setting:**\n' + + '1. Go to your pull request page\n' + + '2. In the right sidebar, look for "Allow edits from maintainers"\n' + + '3. Check the checkbox to enable it\n\n' + + 'Once you\'ve enabled this setting, this check will automatically pass. Thank you! 🙏' + }); + + core.setFailed('Maintainer edits must be enabled for this pull request'); + } else { + console.log('✅ Maintainer edits are enabled'); + } + + check-maintainer-edits-internal: + runs-on: ubuntu-latest + if: github.event.pull_request.head.repo.fork == false && github.event.pull_request.head.repo.full_name == github.repository + + steps: + - name: Skip check for internal PRs + run: | + echo "✅ Skipping maintainer edits check for internal pull request" + echo "This check only applies to external contributors and forks" \ No newline at end of file diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..b457075 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,66 @@ +name: Docker Build and Push to GHCR + +on: + push: + branches: + - main + tags: + - 'v*.*.*' + pull_request: + branches: + - main + workflow_dispatch: + +permissions: {} + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha,prefix=sha- + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + platforms: linux/amd64,linux/arm64 diff --git a/.github/workflows/publish-mcp-registry.yml b/.github/workflows/publish-mcp-registry.yml new file mode 100644 index 0000000..c70f5d2 --- /dev/null +++ b/.github/workflows/publish-mcp-registry.yml @@ -0,0 +1,106 @@ +name: Publish PyPI + MCP Registry + +on: + push: + tags: + - "v*" + workflow_dispatch: + +permissions: {} + +jobs: + publish: + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + + - name: Resolve version from tag + run: echo "VERSION=${GITHUB_REF_NAME#v}" >> "$GITHUB_ENV" + + - name: Verify tag matches pyproject version + run: | + PYPROJECT_VERSION="$(python - <<'PY' + import tomllib + with open("pyproject.toml", "rb") as f: + data = tomllib.load(f) + print(data["project"]["version"]) + PY + )" + if [ "$PYPROJECT_VERSION" != "$VERSION" ]; then + echo "Tag version ($VERSION) does not match pyproject version ($PYPROJECT_VERSION)." + exit 1 + fi + + - name: Sync server.json version with release + run: | + tmp="$(mktemp)" + jq --arg version "$VERSION" ' + .version = $version + | .packages = ( + (.packages // []) + | map( + if ((.registryType // .registry_type // "") == "pypi") + then .version = $version + else . + end + ) + ) + ' server.json > "$tmp" + mv "$tmp" server.json + + - name: Validate server.json against schema + run: | + python -m pip install --upgrade pip + python -m pip install jsonschema requests + python - <<'PY' + import json + import requests + from jsonschema import Draft202012Validator + + with open("server.json", "r", encoding="utf-8") as f: + instance = json.load(f) + + schema_url = instance["$schema"] + schema = requests.get(schema_url, timeout=30).json() + + Draft202012Validator.check_schema(schema) + Draft202012Validator(schema).validate(instance) + print("server.json schema validation passed") + PY + + - name: Build distribution + run: | + python -m pip install build + python -m build + + - name: Check package metadata + run: | + python -m pip install twine + twine check dist/* + + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + skip-existing: true + + - name: Install mcp-publisher + run: | + OS="$(uname -s | tr '[:upper:]' '[:lower:]')" + ARCH="$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')" + curl -fsSL "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_${OS}_${ARCH}.tar.gz" | tar xz mcp-publisher + chmod +x mcp-publisher + + - name: Login to MCP Registry with GitHub OIDC + run: ./mcp-publisher login github-oidc + + - name: Publish server to MCP Registry + run: ./mcp-publisher publish diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..d4e7cb2 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,45 @@ +name: Ruff + +on: + pull_request: + branches: [ main ] + push: + branches: [ main ] + +permissions: + contents: write + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + ref: ${{ github.event.pull_request.head.ref || github.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }} + token: ${{ secrets.GITHUB_TOKEN }} + - uses: actions/setup-python@v6 + with: + python-version: '3.11' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies + run: uv sync + - name: Auto-fix ruff lint and format + if: github.event_name == 'pull_request' + run: | + uv run ruff check --fix + uv run ruff format + - name: Commit and push fixes + if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + run: | + git diff --quiet && exit 0 + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add -A + git commit -m "style: auto-fix ruff lint and format" + git push + - name: Validate + run: | + uv run ruff check + uv run ruff format --check diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4ee5566 --- /dev/null +++ b/.gitignore @@ -0,0 +1,39 @@ +# ---- Python artefacts -------------------------------------------------- +__pycache__/ +*.py[cod] +*.so +.mcp.json +claude.md +.beads/* +.beads/issues.jsonl + +# ---- Packaging --------------------------------------------------------- +*.egg-info/ +build/ +dist/ + +# ---- Environments & tooling ------------------------------------------- +.env +.venv/ +venv/ +.idea/ +.vscode/ + +# ---- macOS clutter ----------------------------------------------------- +.DS_Store + +# ---- Secrets ----------------------------------------------------------- +client_secret.json + +# ---- Logs -------------------------------------------------------------- +mcp_server_debug.log + +# ---- Local development files ------------------------------------------- +/.credentials +/.claude +.serena/ +Caddyfile +ecosystem.config.cjs + +# ---- Agent instructions (not for distribution) ------------------------- +.github/instructions/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..59a1fe4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,45 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Install uv for faster dependency management +RUN pip install --no-cache-dir uv + +COPY . . + +# Install Python dependencies using uv sync +RUN uv sync --frozen --no-dev --extra disk + +# Create non-root user for security +RUN useradd --create-home --shell /bin/bash app \ + && chown -R app:app /app + +# Give read and write access to the store_creds volume +RUN mkdir -p /app/store_creds \ + && chown -R app:app /app/store_creds \ + && chmod 755 /app/store_creds + +USER app + +# Expose port (use default of 8000 if PORT not set) +EXPOSE 8000 +# Expose additional port if PORT environment variable is set to a different value +ARG PORT +EXPOSE ${PORT:-8000} + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ + CMD sh -c 'curl -f http://localhost:${PORT:-8000}/health || exit 1' + +# Set environment variables for Python startup args +ENV TOOL_TIER="" +ENV TOOLS="" + +# Use entrypoint for the base command and CMD for args +ENTRYPOINT ["/bin/sh", "-c"] +CMD ["uv run main.py --transport streamable-http ${TOOL_TIER:+--tool-tier \"$TOOL_TIER\"} ${TOOLS:+--tools $TOOLS}"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..bc5b15a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Taylor Wilsdon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..32a12a8 --- /dev/null +++ b/README.md @@ -0,0 +1,1639 @@ + + +
+ +# Google Workspace MCP Server + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Python 3.10+](https://img.shields.io/badge/Python-3.10%2B-blue.svg)](https://www.python.org/downloads/) +[![PyPI](https://img.shields.io/pypi/v/workspace-mcp.svg)](https://pypi.org/project/workspace-mcp/) +[![PyPI Downloads](https://static.pepy.tech/personalized-badge/workspace-mcp?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=BLUE&left_text=downloads)](https://pepy.tech/projects/workspace-mcp) +[![Website](https://img.shields.io/badge/Website-workspacemcp.com-green.svg)](https://workspacemcp.com) + +*Full natural language control over Google Calendar, Drive, Gmail, Docs, Sheets, Slides, Forms, Tasks, Contacts, and Chat through all MCP clients, AI assistants and developer tools. Includes a full featured CLI for use with tools like Claude Code and Codex!* + +**The most feature-complete Google Workspace MCP server**, with Remote OAuth2.1 multi-user support and 1-click Claude installation. With native OAuth 2.1, stateless mode and external auth server support, it's the only Workspace MCP you can host for your whole organization centrally & securely! + +###### Support for all free Google accounts (Gmail, Docs, Drive etc) & Google Workspace plans (Starter, Standard, Plus, Enterprise, Non Profit) with expanded app options like Chat & Spaces.

Interested in a private, managed cloud instance? [That can be arranged.](https://workspacemcp.com/workspace-mcp-cloud) + + +
+ +
+ + + +
+ +--- + + +**See it in action:** +
+ +
+ +--- + +### A quick plug for AI-Enhanced Docs +
+But why? + +**This README was written with AI assistance, and here's why that matters** +> +> As a solo dev building open source tools, comprehensive documentation often wouldn't happen without AI help. Using agentic dev tools like **Roo** & **Claude Code** that understand the entire codebase, AI doesn't just regurgitate generic content - it extracts real implementation details and creates accurate, specific documentation. +> +> In this case, Sonnet 4 took a pass & a human (me) verified them 2/16/26. +
+ +## Overview + +A production-ready MCP server that integrates all major Google Workspace services with AI assistants. It supports both single-user operation and multi-user authentication via OAuth 2.1, making it a powerful backend for custom applications. Built with FastMCP for optimal performance, featuring advanced authentication handling, service caching, and streamlined development patterns. + +**Simplified Setup**: Now uses Google Desktop OAuth clients - no redirect URIs or port configuration needed! + + +## Features + + + + + + +
+ +**@ Gmail** • ** Drive** • ** Calendar** ** Docs** +- Complete Gmail management, end-to-end coverage +- Full calendar management with advanced features +- File operations with Office format support +- Document creation, editing & comments +- Deep, exhaustive support for fine-grained editing + +--- + +** Forms** • **@ Chat** • ** Sheets** • ** Slides** +- Form creation, publish settings & response management +- Space management & messaging capabilities +- Spreadsheet operations with flexible cell management +- Presentation creation, updates & content manipulation + +--- + +** Apps Script** +- Automate cross-application workflows with custom code +- Execute existing business logic and custom functions +- Manage script projects, deployments & versions +- Debug and modify Apps Script code programmatically +- Bridge Google Workspace services through automation + + + +** Authentication & Security** +- Advanced OAuth 2.0 & OAuth 2.1 support +- Automatic token refresh & session management +- Transport-aware callback handling +- Multi-user bearer token authentication +- Innovative CORS proxy architecture + +--- + +** Tasks** • **👤 Contacts** • ** Custom Search** +- Task & task list management with hierarchy +- Contact management via People API with groups +- Programmable Search Engine (PSE) integration + +
+ +--- + +## Quick Start + +
+Quick Reference Card - Essential commands & configs at a glance + + + +
+ +**Credentials** +```bash +export GOOGLE_OAUTH_CLIENT_ID="..." +export GOOGLE_OAUTH_CLIENT_SECRET="..." +``` +[Full setup →](#credential-configuration) + + + +**Launch Commands** +```bash +uvx workspace-mcp --tool-tier core +uv run main.py --tools gmail drive +``` +[More options →](#start-the-server) + + + +**Tool Tiers** +- `core` - Essential tools +- `extended` - Core + extras +- `complete` - Everything +[Details →](#tool-tiers) + +
+ +
+ + + +#### Required Configuration +
+Environment Variables ← Click to configure in Claude Desktop + + + +
+ +**Required** +| Variable | Purpose | +|----------|---------| +| `GOOGLE_OAUTH_CLIENT_ID` | OAuth client ID from Google Cloud | +| `GOOGLE_OAUTH_CLIENT_SECRET` | OAuth client secret | +| `OAUTHLIB_INSECURE_TRANSPORT=1` | Development only (allows `http://` redirect) | + + + +**Optional** +| Variable | Purpose | +|----------|---------| +| `USER_GOOGLE_EMAIL` | Default email for single-user auth | +| `GOOGLE_PSE_API_KEY` | API key for Custom Search | +| `GOOGLE_PSE_ENGINE_ID` | Search Engine ID for Custom Search | +| `MCP_ENABLE_OAUTH21` | Set to `true` for OAuth 2.1 support | +| `EXTERNAL_OAUTH21_PROVIDER` | Set to `true` for external OAuth flow with bearer tokens (requires OAuth 2.1) | +| `WORKSPACE_MCP_STATELESS_MODE` | Set to `true` for stateless operation (requires OAuth 2.1) | + +
+ +Claude Desktop stores these securely in the OS keychain; set them once in the extension pane. +
+ +--- + +### One-Click Claude Desktop Install (Claude Desktop Only, Stdio, Single User) + +1. **Download:** Grab the latest `google_workspace_mcp.dxt` from the “Releases” page +2. **Install:** Double-click the file – Claude Desktop opens and prompts you to **Install** +3. **Configure:** In Claude Desktop → **Settings → Extensions → Google Workspace MCP**, paste your Google OAuth credentials +4. **Use it:** Start a new Claude chat and call any Google Workspace tool + +> +**Why DXT?** +> Desktop Extensions (`.dxt`) bundle the server, dependencies, and manifest so users go from download → working MCP in **one click** – no terminal, no JSON editing, no version conflicts. + +
+ +
+ +--- + +### Prerequisites + +- **Python 3.10+** +- **[uvx](https://github.com/astral-sh/uv)** (for instant installation) or [uv](https://github.com/astral-sh/uv) (for development) +- **Google Cloud Project** with OAuth 2.0 credentials + +### Configuration + +
+Google Cloud Setup ← OAuth 2.0 credentials & API enablement + + + + + + + + + + +
+ +**1. Create Project** +```text +console.cloud.google.com + +→ Create new project +→ Note project name +``` +[Open Console →](https://console.cloud.google.com/) + + + +**2. OAuth Credentials** +```text +APIs & Services → Credentials +→ Create Credentials +→ OAuth Client ID +→ Desktop Application +``` +Download & save credentials + + + +**3. Enable APIs** +```text +APIs & Services → Library + +Search & enable: +Calendar, Drive, Gmail, +Docs, Sheets, Slides, +Forms, Tasks, People, +Chat, Search +``` +See quick links below + +
+ +
+OAuth Credential Setup Guide ← Step-by-step instructions + +**Complete Setup Process:** + +1. **Create OAuth 2.0 Credentials** - Visit [Google Cloud Console](https://console.cloud.google.com/) + - Create a new project (or use existing) + - Navigate to **APIs & Services → Credentials** + - Click **Create Credentials → OAuth Client ID** + - Choose **Desktop Application** as the application type (no redirect URIs needed!) + - Download credentials and note the Client ID and Client Secret + +2. **Enable Required APIs** - In **APIs & Services → Library** + - Search for and enable each required API + - Or use the quick links below for one-click enabling + +3. **Configure Environment** - Set your credentials: + ```bash + export GOOGLE_OAUTH_CLIENT_ID="your-client-id" + export GOOGLE_OAUTH_CLIENT_SECRET="your-secret" + ``` + +[Full Documentation →](https://developers.google.com/workspace/guides/auth-overview) + +
+ +
+ +
+ Quick API Enable Links ← One-click enable each Google API + You can enable each one by clicking the links below (make sure you're logged into the Google Cloud Console and have the correct project selected): + +* [Enable Google Calendar API](https://console.cloud.google.com/flows/enableapi?apiid=calendar-json.googleapis.com) +* [Enable Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com) +* [Enable Gmail API](https://console.cloud.google.com/flows/enableapi?apiid=gmail.googleapis.com) +* [Enable Google Docs API](https://console.cloud.google.com/flows/enableapi?apiid=docs.googleapis.com) +* [Enable Google Sheets API](https://console.cloud.google.com/flows/enableapi?apiid=sheets.googleapis.com) +* [Enable Google Slides API](https://console.cloud.google.com/flows/enableapi?apiid=slides.googleapis.com) +* [Enable Google Forms API](https://console.cloud.google.com/flows/enableapi?apiid=forms.googleapis.com) +* [Enable Google Tasks API](https://console.cloud.google.com/flows/enableapi?apiid=tasks.googleapis.com) +* [Enable Google Chat API](https://console.cloud.google.com/flows/enableapi?apiid=chat.googleapis.com) +* [Enable Google People API](https://console.cloud.google.com/flows/enableapi?apiid=people.googleapis.com) +* [Enable Google Custom Search API](https://console.cloud.google.com/flows/enableapi?apiid=customsearch.googleapis.com) +* [Enable Google Apps Script API](https://console.cloud.google.com/flows/enableapi?apiid=script.googleapis.com) + +
+ +
+ +1.1. **Credentials**: See [Credential Configuration](#credential-configuration) for detailed setup options + +2. **Environment Configuration**: + +
+Environment Variables ← Configure your runtime environment + + + + + + + +
+ +**◆ Development Mode** +```bash +export OAUTHLIB_INSECURE_TRANSPORT=1 +``` +Allows HTTP redirect URIs + + + +**@ Default User** +```bash +export USER_GOOGLE_EMAIL=\ + your.email@gmail.com +``` +Single-user authentication + + + +**◆ Custom Search** +```bash +export GOOGLE_PSE_API_KEY=xxx +export GOOGLE_PSE_ENGINE_ID=yyy +``` +Optional: Search API setup + +
+ +
+ +3. **Server Configuration**: + +
+Server Settings ← Customize ports, URIs & proxies + + + + + + + +
+ +**◆ Base Configuration** +```bash +export WORKSPACE_MCP_BASE_URI= + http://localhost +export WORKSPACE_MCP_PORT=8000 +export WORKSPACE_MCP_HOST=0.0.0.0 # Use 127.0.0.1 for localhost-only +``` +Server URL & port settings + + + +**↻ Proxy Support** +```bash +export MCP_ENABLE_OAUTH21= + true +``` +Leverage multi-user OAuth2.1 clients + + + +**@ Default Email** +```bash +export USER_GOOGLE_EMAIL=\ + your.email@gmail.com +``` +Skip email in auth flows in single user mode + +
+ +
+Configuration Details ← Learn more about each setting + +| Variable | Description | Default | +|----------|-------------|---------| +| `WORKSPACE_MCP_BASE_URI` | Base server URI (no port) | `http://localhost` | +| `WORKSPACE_MCP_PORT` | Server listening port | `8000` | +| `WORKSPACE_MCP_HOST` | Server bind host | `0.0.0.0` | +| `WORKSPACE_EXTERNAL_URL` | External URL for reverse proxy setups | None | +| `WORKSPACE_ATTACHMENT_DIR` | Directory for downloaded attachments | `~/.workspace-mcp/attachments/` | +| `GOOGLE_OAUTH_REDIRECT_URI` | Override OAuth callback URL | Auto-constructed | +| `USER_GOOGLE_EMAIL` | Default auth email | None | + +
+ +
+ +### Google Custom Search Setup + +
+Custom Search Configuration ← Enable web search capabilities + + + + + + + + + + +
+ +**1. Create Search Engine** +```text +programmablesearchengine.google.com +/controlpanel/create + +→ Configure sites or entire web +→ Note your Engine ID (cx) +``` +[Open Control Panel →](https://programmablesearchengine.google.com/controlpanel/create) + + + +**2. Get API Key** +```text +developers.google.com +/custom-search/v1/overview + +→ Create/select project +→ Enable Custom Search API +→ Create credentials (API Key) +``` +[Get API Key →](https://developers.google.com/custom-search/v1/overview) + + + +**3. Set Variables** +```bash +export GOOGLE_PSE_API_KEY=\ + "your-api-key" +export GOOGLE_PSE_ENGINE_ID=\ + "your-engine-id" +``` +Configure in environment + +
+ +
+Quick Setup Guide ← Step-by-step instructions + +**Complete Setup Process:** + +1. **Create Search Engine** - Visit the [Control Panel](https://programmablesearchengine.google.com/controlpanel/create) + - Choose "Search the entire web" or specify sites + - Copy the Search Engine ID (looks like: `017643444788157684527:6ivsjbpxpqw`) + +2. **Enable API & Get Key** - Visit [Google Developers Console](https://console.cloud.google.com/) + - Enable "Custom Search API" in your project + - Create credentials → API Key + - Restrict key to Custom Search API (recommended) + +3. **Configure Environment** - Add to your shell or `.env`: + ```bash + export GOOGLE_PSE_API_KEY="AIzaSy..." + export GOOGLE_PSE_ENGINE_ID="01764344478..." + ``` + +≡ [Full Documentation →](https://developers.google.com/custom-search/v1/overview) + +
+ +
+ +
+ +### Start the Server + +> **📌 Transport Mode Guidance**: Use **streamable HTTP mode** (`--transport streamable-http`) for all modern MCP clients including Claude Code, VS Code MCP, and MCP Inspector. Stdio mode is only for clients with incomplete MCP specification support. + +
+Launch Commands ← Choose your startup mode + + + + + + + + + + +
+ +**▶ Legacy Mode** +```bash +uv run main.py +``` +⚠️ Stdio mode (incomplete MCP clients only) + + + +**◆ HTTP Mode (Recommended)** +```bash +uv run main.py \ + --transport streamable-http +``` +✅ Full MCP spec compliance & OAuth 2.1 + + + +**@ Single User** +```bash +uv run main.py \ + --single-user +``` +Simplified authentication +⚠️ Cannot be used with OAuth 2.1 mode + +
+ +
+Advanced Options ← Tool selection, tiers & Docker + +**▶ Selective Tool Loading** +```bash +# Load specific services only +uv run main.py --tools gmail drive calendar +uv run main.py --tools sheets docs + +# Combine with other flags +uv run main.py --single-user --tools gmail +``` + + +**🔒 Read-Only Mode** +```bash +# Requests only read-only scopes & disables write tools +uv run main.py --read-only + +# Combine with specific tools or tiers +uv run main.py --tools gmail drive --read-only +uv run main.py --tool-tier core --read-only +``` +Read-only mode provides secure, restricted access by: +- Requesting only `*.readonly` OAuth scopes (e.g., `gmail.readonly`, `drive.readonly`) +- Automatically filtering out tools that require write permissions at startup +- Allowing read operations: list, get, search, and export across all services + +**🔐 Granular Permissions** +```bash +# Per-service permission levels +uv run main.py --permissions gmail:organize drive:readonly + +# Combine permissions with tier filtering +uv run main.py --permissions gmail:send drive:full --tool-tier core +``` +Granular permissions mode provides service-by-service scope control: +- Format: `service:level` (one entry per service) +- Gmail levels: `readonly`, `organize`, `drafts`, `send`, `full` (cumulative) +- Tasks levels: `readonly`, `manage`, `full` (cumulative; `manage` allows create/update/move but denies `delete` and `clear_completed`) +- Other services currently support: `readonly`, `full` +- `--permissions` and `--read-only` are mutually exclusive +- `--permissions` cannot be combined with `--tools`; enabled services are determined by the `--permissions` entries (optionally filtered by `--tool-tier`) +- With `--tool-tier`, only tier-matched tools are enabled and only services that have tools in the selected tier are imported + +**★ Tool Tiers** +```bash +uv run main.py --tool-tier core # ● Essential tools only +uv run main.py --tool-tier extended # ◐ Core + additional +uv run main.py --tool-tier complete # ○ All available tools +``` + +**◆ Docker Deployment** +```bash +docker build -t workspace-mcp . +docker run -p 8000:8000 -v $(pwd):/app \ + workspace-mcp --transport streamable-http + +# With tool selection via environment variables +docker run -e TOOL_TIER=core workspace-mcp +docker run -e TOOLS="gmail drive calendar" workspace-mcp +``` + +**Available Services**: `gmail` • `drive` • `calendar` • `docs` • `sheets` • `forms` • `tasks` • `contacts` • `chat` • `search` + +
+ +
+ +
+ +### CLI Mode + +The server supports a CLI mode for direct tool invocation without running the full MCP server. This is ideal for scripting, automation, and use by coding agents (Codex, Claude Code). + +
+CLI Commands ← Direct tool execution from command line + + + + + + + + + + +
+ +**▶ List Tools** +```bash +workspace-mcp --cli +workspace-mcp --cli list +workspace-mcp --cli list --json +``` +View all available tools + + + +**◆ Tool Help** +```bash +workspace-mcp --cli search_gmail_messages --help +``` +Show parameters and documentation + +
+ +**▶ Run with Arguments** +```bash +workspace-mcp --cli search_gmail_messages \ + --args '{"query": "is:unread"}' +``` +Execute tool with inline JSON + + + +**◆ Pipe from Stdin** +```bash +echo '{"query": "is:unread"}' | \ + workspace-mcp --cli search_gmail_messages +``` +Pass arguments via stdin + +
+ +
+CLI Usage Details ← Complete reference + +**Command Structure:** +```bash +workspace-mcp --cli [command] [options] +``` + +**Commands:** +| Command | Description | +|---------|-------------| +| `list` (default) | List all available tools | +| `` | Execute the specified tool | +| ` --help` | Show detailed help for a tool | + +**Options:** +| Option | Description | +|--------|-------------| +| `--args`, `-a` | JSON string with tool arguments | +| `--json`, `-j` | Output in JSON format (for `list` command) | +| `--help`, `-h` | Show help for a tool | + +**Examples:** +```bash +# List all Gmail tools +workspace-mcp --cli list | grep gmail + +# Search for unread emails +workspace-mcp --cli search_gmail_messages --args '{"query": "is:unread", "max_results": 5}' + +# Get calendar events for today +workspace-mcp --cli get_events --args '{"calendar_id": "primary", "time_min": "2024-01-15T00:00:00Z"}' + +# Create a Drive file from a URL +workspace-mcp --cli create_drive_file --args '{"name": "doc.pdf", "source_url": "https://example.com/file.pdf"}' + +# Combine with jq for processing +workspace-mcp --cli list --json | jq '.tools[] | select(.name | contains("gmail"))' +``` + +**Notes:** +- CLI mode uses OAuth 2.0 (same credentials as server mode) +- Authentication flows work the same way - browser opens for first-time auth +- Results are printed to stdout; errors go to stderr +- Exit code 0 on success, 1 on error + +
+ +
+ +### Tool Tiers + +The server organizes tools into **three progressive tiers** for simplified deployment. Choose a tier that matches your usage needs and API quota requirements. + + + + + + +
+ +#### Available Tiers + +** Core** (`--tool-tier core`) +Essential tools for everyday tasks. Perfect for light usage with minimal API quotas. Includes search, read, create, and basic modify operations across all services. + +** Extended** (`--tool-tier extended`) +Core functionality plus management tools. Adds labels, folders, batch operations, and advanced search. Ideal for regular usage with moderate API needs. + +** Complete** (`--tool-tier complete`) +Full API access including comments, headers/footers, publishing settings, and administrative functions. For power users needing maximum functionality. + + + +#### Important Notes + + **Start with `core`** and upgrade as needed + **Tiers are cumulative** – each includes all previous + **Mix and match** with `--tools` for specific services + **Configuration** in `core/tool_tiers.yaml` + **Authentication** included in all tiers + +
+ +#### Usage Examples + +```bash +# Basic tier selection +uv run main.py --tool-tier core # Start with essential tools only +uv run main.py --tool-tier extended # Expand to include management features +uv run main.py --tool-tier complete # Enable all available functionality + +# Selective service loading with tiers +uv run main.py --tools gmail drive --tool-tier core # Core tools for specific services +uv run main.py --tools gmail --tool-tier extended # Extended Gmail functionality only +uv run main.py --tools docs sheets --tool-tier complete # Full access to Docs and Sheets + +# Combine tier selection with granular permission levels +uv run main.py --permissions gmail:organize drive:full --tool-tier core +``` + +## 📋 Credential Configuration + +
+🔑 OAuth Credentials Setup ← Essential for all installations + + + + + + + + + + +
+ +**🚀 Environment Variables** +```bash +export GOOGLE_OAUTH_CLIENT_ID=\ + "your-client-id" +export GOOGLE_OAUTH_CLIENT_SECRET=\ + "your-secret" +``` +Best for production + + + +**📁 File-based** +```bash +# Download & place in project root +client_secret.json + +# Or specify custom path +export GOOGLE_CLIENT_SECRET_PATH=\ + /path/to/secret.json +``` +Traditional method + + + +**⚡ .env File** +```bash +cp .env.oauth21 .env +# Edit .env with credentials +``` +Best for development + +
+ +
+📖 Credential Loading Details ← Understanding priority & best practices + +**Loading Priority** +1. Environment variables (`export VAR=value`) +2. `.env` file in project root (warning - if you run via `uvx` rather than `uv run` from the repo directory, you are spawning a standalone process not associated with your clone of the repo and it will not find your .env file without specifying it directly) +3. `client_secret.json` via `GOOGLE_CLIENT_SECRET_PATH` +4. Default `client_secret.json` in project root + +**Why Environment Variables?** +- ✅ **Docker/K8s ready** - Native container support +- ✅ **Cloud platforms** - Heroku, Railway, Vercel +- ✅ **CI/CD pipelines** - GitHub Actions, Jenkins +- ✅ **No secrets in git** - Keep credentials secure +- ✅ **Easy rotation** - Update without code changes + +
+ +
+ +
+ +--- + +## 🧰 Available Tools + +> **Note**: All tools support automatic authentication via `@require_google_service()` decorators with 30-minute service caching. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +### 📅 **Google Calendar** [`calendar_tools.py`](gcalendar/calendar_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `list_calendars` | **Core** | List accessible calendars | +| `get_events` | **Core** | Retrieve events with time range filtering | +| `manage_event` | **Core** | Create, update, or delete calendar events | + + + +### 📁 **Google Drive** [`drive_tools.py`](gdrive/drive_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_drive_files` | **Core** | Search files with query syntax | +| `get_drive_file_content` | **Core** | Read file content (Office formats) | +| `get_drive_file_download_url` | **Core** | Download Drive files to local disk | +| `create_drive_file` | **Core** | Create files or fetch from URLs | +| `create_drive_folder` | **Core** | Create empty folders in Drive or shared drives | +| `import_to_google_doc` | **Core** | Import files (MD, DOCX, HTML, etc.) as Google Docs | +| `get_drive_shareable_link` | **Core** | Get shareable links for a file | +| `list_drive_items` | Extended | List folder contents | +| `copy_drive_file` | Extended | Copy existing files (templates) with optional renaming | +| `update_drive_file` | Extended | Update file metadata, move between folders | +| `manage_drive_access` | Extended | Grant, update, revoke permissions, and transfer ownership | +| `set_drive_file_permissions` | Extended | Set link sharing and file-level sharing settings | +| `get_drive_file_permissions` | Complete | Get detailed file permissions | +| `check_drive_file_public_access` | Complete | Check public sharing status | + +
+ +### 📧 **Gmail** [`gmail_tools.py`](gmail/gmail_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_gmail_messages` | **Core** | Search with Gmail operators | +| `get_gmail_message_content` | **Core** | Retrieve message content | +| `get_gmail_messages_content_batch` | **Core** | Batch retrieve message content | +| `send_gmail_message` | **Core** | Send emails | +| `get_gmail_thread_content` | Extended | Get full thread content | +| `modify_gmail_message_labels` | Extended | Modify message labels | +| `list_gmail_labels` | Extended | List available labels | +| `list_gmail_filters` | Extended | List Gmail filters | +| `manage_gmail_label` | Extended | Create/update/delete labels | +| `manage_gmail_filter` | Extended | Create or delete Gmail filters | +| `draft_gmail_message` | Extended | Create drafts | +| `get_gmail_threads_content_batch` | Complete | Batch retrieve thread content | +| `batch_modify_gmail_message_labels` | Complete | Batch modify labels | +| `start_google_auth` | Complete | Legacy OAuth 2.0 auth (disabled when OAuth 2.1 is enabled) | + +
+📎 Email Attachments ← Send emails with files + +Both `send_gmail_message` and `draft_gmail_message` support attachments via two methods: + +**Option 1: File Path** (local server only) +```python +attachments=[{"path": "/path/to/report.pdf"}] +``` +Reads file from disk, auto-detects MIME type. Optional `filename` override. + +**Option 2: Base64 Content** (works everywhere) +```python +attachments=[{ + "filename": "report.pdf", + "content": "JVBERi0xLjQK...", # base64-encoded + "mime_type": "application/pdf" # optional +}] +``` + +**⚠️ Centrally Hosted Servers**: When the MCP server runs remotely (cloud, shared instance), it cannot access your local filesystem. Use **Option 2** with base64-encoded content. Your MCP client must encode files before sending. + +
+ +
+📥 Downloaded Attachment Storage ← Where downloaded files are saved + +When downloading Gmail attachments (`get_gmail_attachment_content`) or Drive files (`get_drive_file_download_url`), files are saved to a persistent local directory rather than a temporary folder in the working directory. + +**Default location:** `~/.workspace-mcp/attachments/` + +Files are saved with their original filename plus a short UUID suffix for uniqueness (e.g., `invoice_a1b2c3d4.pdf`). In **stdio mode**, the tool returns the absolute file path for direct filesystem access. In **HTTP mode**, it returns a download URL via the `/attachments/{file_id}` endpoint. + +To customize the storage directory: +```bash +export WORKSPACE_ATTACHMENT_DIR="/path/to/custom/dir" +``` + +Saved files expire after 1 hour and are cleaned up automatically. + +
+ +
+ +### 📝 **Google Docs** [`docs_tools.py`](gdocs/docs_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `get_doc_content` | **Core** | Extract document text | +| `create_doc` | **Core** | Create new documents | +| `modify_doc_text` | **Core** | Modify document text (formatting + links) | +| `search_docs` | Extended | Find documents by name | +| `find_and_replace_doc` | Extended | Find and replace text | +| `list_docs_in_folder` | Extended | List docs in folder | +| `insert_doc_elements` | Extended | Add tables, lists, page breaks | +| `update_paragraph_style` | Extended | Apply heading styles, lists (bulleted/numbered with nesting), and paragraph formatting | +| `get_doc_as_markdown` | Extended | Export document as formatted Markdown with optional comments | +| `insert_doc_image` | Complete | Insert images from Drive/URLs | +| `update_doc_headers_footers` | Complete | Modify headers and footers | +| `batch_update_doc` | Complete | Execute multiple operations | +| `inspect_doc_structure` | Complete | Analyze document structure | +| `export_doc_to_pdf` | Extended | Export document to PDF | +| `create_table_with_data` | Complete | Create data tables | +| `debug_table_structure` | Complete | Debug table issues | +| `list_document_comments` | Complete | List all document comments | +| `manage_document_comment` | Complete | Create, reply to, or resolve comments | + +
+ +### 📊 **Google Sheets** [`sheets_tools.py`](gsheets/sheets_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `read_sheet_values` | **Core** | Read cell ranges | +| `modify_sheet_values` | **Core** | Write/update/clear cells | +| `create_spreadsheet` | **Core** | Create new spreadsheets | +| `list_spreadsheets` | Extended | List accessible spreadsheets | +| `get_spreadsheet_info` | Extended | Get spreadsheet metadata | +| `format_sheet_range` | Extended | Apply colors, number formats, text wrapping, alignment, bold/italic, font size | +| `create_sheet` | Complete | Add sheets to existing files | +| `list_spreadsheet_comments` | Complete | List all spreadsheet comments | +| `manage_spreadsheet_comment` | Complete | Create, reply to, or resolve comments | +| `manage_conditional_formatting` | Complete | Add, update, or delete conditional formatting rules | + + + +### 🖼️ **Google Slides** [`slides_tools.py`](gslides/slides_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `create_presentation` | **Core** | Create new presentations | +| `get_presentation` | **Core** | Retrieve presentation details | +| `batch_update_presentation` | Extended | Apply multiple updates | +| `get_page` | Extended | Get specific slide information | +| `get_page_thumbnail` | Extended | Generate slide thumbnails | +| `list_presentation_comments` | Complete | List all presentation comments | +| `manage_presentation_comment` | Complete | Create, reply to, or resolve comments | + +
+ +### 📝 **Google Forms** [`forms_tools.py`](gforms/forms_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `create_form` | **Core** | Create new forms | +| `get_form` | **Core** | Retrieve form details & URLs | +| `set_publish_settings` | Complete | Configure form settings | +| `get_form_response` | Complete | Get individual responses | +| `list_form_responses` | Extended | List all responses with pagination | +| `batch_update_form` | Complete | Apply batch updates (questions, settings) | + + + +### ✓ **Google Tasks** [`tasks_tools.py`](gtasks/tasks_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `list_tasks` | **Core** | List tasks with filtering | +| `get_task` | **Core** | Retrieve task details | +| `manage_task` | **Core** | Create, update, delete, or move tasks | +| `list_task_lists` | Complete | List task lists | +| `get_task_list` | Complete | Get task list details | +| `manage_task_list` | Complete | Create, update, delete task lists, or clear completed tasks | + +
+ +### 👤 **Google Contacts** [`contacts_tools.py`](gcontacts/contacts_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_contacts` | **Core** | Search contacts by name, email, phone | +| `get_contact` | **Core** | Retrieve detailed contact info | +| `list_contacts` | **Core** | List contacts with pagination | +| `manage_contact` | **Core** | Create, update, or delete contacts | +| `list_contact_groups` | Extended | List contact groups/labels | +| `get_contact_group` | Extended | Get group details with members | +| `manage_contacts_batch` | Complete | Batch create, update, or delete contacts | +| `manage_contact_group` | Complete | Create, update, delete groups, or modify membership | + +
+ +### 💬 **Google Chat** [`chat_tools.py`](gchat/chat_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `list_spaces` | Extended | List chat spaces/rooms | +| `get_messages` | **Core** | Retrieve space messages | +| `send_message` | **Core** | Send messages to spaces | +| `search_messages` | **Core** | Search across chat history | +| `create_reaction` | **Core** | Add emoji reaction to a message | +| `download_chat_attachment` | Extended | Download attachment from a chat message | + + + +### 🔍 **Google Custom Search** [`search_tools.py`](gsearch/search_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_custom` | **Core** | Perform web searches (supports site restrictions via sites parameter) | +| `get_search_engine_info` | Complete | Retrieve search engine metadata | + +
+ +### **Google Apps Script** [`apps_script_tools.py`](gappsscript/apps_script_tools.py) + +| Tool | Tier | Description | +|------|------|-------------| +| `list_script_projects` | **Core** | List accessible Apps Script projects | +| `get_script_project` | **Core** | Get complete project with all files | +| `get_script_content` | **Core** | Retrieve specific file content | +| `create_script_project` | **Core** | Create new standalone or bound project | +| `update_script_content` | **Core** | Update or create script files | +| `run_script_function` | **Core** | Execute function with parameters | +| `list_deployments` | Extended | List all project deployments | +| `manage_deployment` | Extended | Create, update, or delete script deployments | +| `list_script_processes` | Extended | View recent executions and status | + +
+ + +**Tool Tier Legend:** +- **Core**: Essential tools for basic functionality • Minimal API usage • Getting started +- **Extended**: Core tools + additional features • Regular usage • Expanded capabilities +- **Complete**: All available tools including advanced features • Power users • Full API access + +--- + +### Connect to Claude Desktop + +The server supports two transport modes: + +#### Stdio Mode (Legacy - For Clients with Incomplete MCP Support) + +> **⚠️ Important**: Stdio mode is a **legacy fallback** for clients that don't properly implement the MCP specification with OAuth 2.1 and streamable HTTP support. **Claude Code and other modern MCP clients should use streamable HTTP mode** (`--transport streamable-http`) for proper OAuth flow and multi-user support. + +In general, you should use the one-click DXT installer package for Claude Desktop. +If you are unable to for some reason, you can configure it manually via `claude_desktop_config.json` + +**Manual Claude Configuration (Alternative)** + +
+📝 Claude Desktop JSON Config ← Click for manual setup instructions + +1. Open Claude Desktop Settings → Developer → Edit Config + - **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json` + - **Windows**: `%APPDATA%\Claude\claude_desktop_config.json` + +2. Add the server configuration: +```json +{ + "mcpServers": { + "google_workspace": { + "command": "uvx", + "args": ["workspace-mcp"], + "env": { + "GOOGLE_OAUTH_CLIENT_ID": "your-client-id", + "GOOGLE_OAUTH_CLIENT_SECRET": "your-secret", + "OAUTHLIB_INSECURE_TRANSPORT": "1" + } + } + } +} +``` +
+ +### Connect to LM Studio + +Add a new MCP server in LM Studio (Settings → MCP Servers) using the same JSON format: + +```json +{ + "mcpServers": { + "google_workspace": { + "command": "uvx", + "args": ["workspace-mcp"], + "env": { + "GOOGLE_OAUTH_CLIENT_ID": "your-client-id", + "GOOGLE_OAUTH_CLIENT_SECRET": "your-secret", + "OAUTHLIB_INSECURE_TRANSPORT": "1", + } + } + } +} +``` + + +### 2. Advanced / Cross-Platform Installation + +If you’re developing, deploying to servers, or using another MCP-capable client, keep reading. + +#### Instant CLI (uvx) + +
+Quick Start with uvx ← No installation required! + +```bash +# Requires Python 3.10+ and uvx +# First, set credentials (see Credential Configuration above) +uvx workspace-mcp --tool-tier core # or --tools gmail drive calendar +``` + +> **Note**: Configure [OAuth credentials](#credential-configuration) before running. Supports environment variables, `.env` file, or `client_secret.json`. + +
+ +### Local Development Setup + +
+🛠️ Developer Workflow ← Install deps, lint, and test + +```bash +# Install everything needed for linting, tests, and release tooling +uv sync --group dev + +# Run the same linter that git hooks invoke automatically +uv run ruff check . + +# Execute the full test suite (async fixtures require pytest-asyncio) +uv run pytest +``` + +- `uv sync --group test` installs only the testing stack if you need a slimmer environment. +- `uv run main.py --transport streamable-http` launches the server with your checked-out code for manual verification. +- Ruff is part of the `dev` group because pre-push hooks call `ruff check` automatically—run it locally before committing to avoid hook failures. + +
+ +### OAuth 2.1 Support (Multi-User Bearer Token Authentication) + +The server includes OAuth 2.1 support for bearer token authentication, enabling multi-user session management. **OAuth 2.1 automatically reuses your existing `GOOGLE_OAUTH_CLIENT_ID` and `GOOGLE_OAUTH_CLIENT_SECRET` credentials** - no additional configuration needed! + +**When to use OAuth 2.1:** +- Multiple users accessing the same MCP server instance +- Need for bearer token authentication instead of passing user emails +- Building web applications or APIs on top of the MCP server +- Production environments requiring secure session management +- Browser-based clients requiring CORS support + +**⚠️ Important: OAuth 2.1 and Single-User Mode are mutually exclusive** + +OAuth 2.1 mode (`MCP_ENABLE_OAUTH21=true`) cannot be used together with the `--single-user` flag: +- **Single-user mode**: For legacy clients that pass user emails in tool calls +- **OAuth 2.1 mode**: For modern multi-user scenarios with bearer token authentication + +Choose one authentication method - using both will result in a startup error. + +**Enabling OAuth 2.1:** +To enable OAuth 2.1, set the `MCP_ENABLE_OAUTH21` environment variable to `true`. + +```bash +# OAuth 2.1 requires HTTP transport mode +export MCP_ENABLE_OAUTH21=true +uv run main.py --transport streamable-http +``` + +If `MCP_ENABLE_OAUTH21` is not set to `true`, the server will use legacy authentication, which is suitable for clients that do not support OAuth 2.1. + +
+🔐 How the FastMCP GoogleProvider handles OAuth ← Advanced OAuth 2.1 details + +FastMCP ships a native `GoogleProvider` that we now rely on directly. It solves the two tricky parts of using Google OAuth with MCP clients: + +1. **Dynamic Client Registration**: Google still doesn't support OAuth 2.1 DCR, but the FastMCP provider exposes the full DCR surface and forwards registrations to Google using your fixed credentials. MCP clients register as usual and the provider hands them your Google client ID/secret under the hood. + +2. **CORS & Browser Compatibility**: The provider includes an OAuth proxy that serves all discovery, authorization, and token endpoints with proper CORS headers. We no longer maintain custom `/oauth2/*` routes—the provider handles the upstream exchanges securely and advertises the correct metadata to clients. + +The result is a leaner server that still enables any OAuth 2.1 compliant client (including browser-based ones) to authenticate through Google without bespoke code. + +
+ +### Stateless Mode (Container-Friendly) + +The server supports a stateless mode designed for containerized environments where file system writes should be avoided: + +**Enabling Stateless Mode:** +```bash +# Stateless mode requires OAuth 2.1 to be enabled +export MCP_ENABLE_OAUTH21=true +export WORKSPACE_MCP_STATELESS_MODE=true +uv run main.py --transport streamable-http +``` + +**Key Features:** +- **No file system writes**: Credentials are never written to disk +- **No debug logs**: File-based logging is completely disabled +- **Memory-only sessions**: All tokens stored in memory via OAuth 2.1 session store +- **Container-ready**: Perfect for Docker, Kubernetes, and serverless deployments +- **Token per request**: Each request must include a valid Bearer token + +**Requirements:** +- Must be used with `MCP_ENABLE_OAUTH21=true` +- Incompatible with single-user mode +- Clients must handle OAuth flow and send valid tokens with each request + +This mode is ideal for: +- Cloud deployments where persistent storage is unavailable +- Multi-tenant environments requiring strict isolation +- Containerized applications with read-only filesystems +- Serverless functions and ephemeral compute environments + +**MCP Inspector**: No additional configuration needed with desktop OAuth client. + +**Claude Code**: No additional configuration needed with desktop OAuth client. + +### OAuth Proxy Storage Backends + +The server supports pluggable storage backends for OAuth proxy state management via FastMCP 2.13.0+. Choose a backend based on your deployment needs. + +**Available Backends:** + +| Backend | Best For | Persistence | Multi-Server | +|---------|----------|-------------|--------------| +| Memory | Development, testing | ❌ | ❌ | +| Disk | Single-server production | ✅ | ❌ | +| Valkey/Redis | Distributed production | ✅ | ✅ | + +**Configuration:** + +```bash +# Memory storage (fast, no persistence) +export WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND=memory + +# Disk storage (persists across restarts) +export WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND=disk +export WORKSPACE_MCP_OAUTH_PROXY_DISK_DIRECTORY=~/.fastmcp/oauth-proxy + +# Valkey/Redis storage (distributed, multi-server) +export WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND=valkey +export WORKSPACE_MCP_OAUTH_PROXY_VALKEY_HOST=redis.example.com +export WORKSPACE_MCP_OAUTH_PROXY_VALKEY_PORT=6379 +``` + +> Disk support requires `workspace-mcp[disk]` (or `py-key-value-aio[disk]`) when installing from source. +> The official Docker image includes the `disk` extra by default. +> Valkey support is optional. Install `workspace-mcp[valkey]` (or `py-key-value-aio[valkey]`) only if you enable the Valkey backend. +> Windows: building `valkey-glide` from source requires MSVC C++ build tools with C11 support. If you see `aws-lc-sys` C11 errors, set `CFLAGS=/std:c11`. + +
+🔐 Valkey/Redis Configuration Options + +| Variable | Default | Description | +|----------|---------|-------------| +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_HOST` | localhost | Valkey/Redis host | +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_PORT` | 6379 | Port (6380 auto-enables TLS) | +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_DB` | 0 | Database number | +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_USE_TLS` | auto | Enable TLS (auto if port 6380) | +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_USERNAME` | - | Authentication username | +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_PASSWORD` | - | Authentication password | +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_REQUEST_TIMEOUT_MS` | 5000 | Request timeout for remote hosts | +| `WORKSPACE_MCP_OAUTH_PROXY_VALKEY_CONNECTION_TIMEOUT_MS` | 10000 | Connection timeout for remote hosts | + +**Encryption:** Disk and Valkey storage are encrypted with Fernet. The encryption key is derived from `FASTMCP_SERVER_AUTH_GOOGLE_JWT_SIGNING_KEY` if set, otherwise from `GOOGLE_OAUTH_CLIENT_SECRET`. + +
+ +### External OAuth 2.1 Provider Mode + +The server supports an external OAuth 2.1 provider mode for scenarios where authentication is handled by an external system. In this mode, the MCP server does not manage the OAuth flow itself but expects valid bearer tokens in the Authorization header of tool calls. + +**Enabling External OAuth 2.1 Provider Mode:** +```bash +# External OAuth provider mode requires OAuth 2.1 to be enabled +export MCP_ENABLE_OAUTH21=true +export EXTERNAL_OAUTH21_PROVIDER=true +uv run main.py --transport streamable-http +``` + +**How It Works:** +- **Protocol-level auth disabled**: MCP handshake (`initialize`) and `tools/list` do not require authentication +- **Tool-level auth required**: All tool calls must include `Authorization: Bearer ` header +- **External OAuth flow**: Your external system handles the OAuth flow and obtains Google access tokens +- **Token validation**: Server validates bearer tokens via Google's tokeninfo API +- **Multi-user support**: Each request is authenticated independently based on its bearer token + +**Key Features:** +- **No local OAuth flow**: Server does not provide OAuth callback endpoints or manage OAuth state +- **Bearer token only**: All authentication via Authorization headers +- **Stateless by design**: Works seamlessly with `WORKSPACE_MCP_STATELESS_MODE=true` +- **External identity providers**: Integrate with your existing authentication infrastructure +- **Tool discovery**: Clients can list available tools without authentication + +**Requirements:** +- Must be used with `MCP_ENABLE_OAUTH21=true` +- OAuth credentials still required for token validation (`GOOGLE_OAUTH_CLIENT_ID`, `GOOGLE_OAUTH_CLIENT_SECRET`) +- External system must obtain valid Google OAuth access tokens (ya29.*) +- Each tool call request must include valid bearer token + +**Use Cases:** +- Integrating with existing authentication systems +- Custom OAuth flows managed by your application +- API gateways that handle authentication upstream +- Multi-tenant SaaS applications with centralized auth +- Mobile or web apps with their own OAuth implementation + + +### VS Code MCP Client Support + +> **✅ Recommended**: VS Code MCP extension properly supports the full MCP specification. **Always use HTTP transport mode** for proper OAuth 2.1 authentication. + +
+🆚 VS Code Configuration ← Setup for VS Code MCP extension + +```json +{ + "servers": { + "google-workspace": { + "url": "http://localhost:8000/mcp/", + "type": "http" + } + } +} +``` + +*Note: Make sure to start the server with `--transport streamable-http` when using VS Code MCP.* +
+ +### Claude Code MCP Client Support + +> **✅ Recommended**: Claude Code is a modern MCP client that properly supports the full MCP specification. **Always use HTTP transport mode** with Claude Code for proper OAuth 2.1 authentication and multi-user support. + +
+🆚 Claude Code Configuration ← Setup for Claude Code MCP support + +```bash +# Start the server in HTTP mode first +uv run main.py --transport streamable-http + +# Then add to Claude Code +claude mcp add --transport http workspace-mcp http://localhost:8000/mcp +``` +
+ +#### Reverse Proxy Setup + +If you're running the MCP server behind a reverse proxy (nginx, Apache, Cloudflare, etc.), you have two configuration options: + +**Problem**: When behind a reverse proxy, the server constructs OAuth URLs using internal ports (e.g., `http://localhost:8000`) but external clients need the public URL (e.g., `https://your-domain.com`). + +**Solution 1**: Set `WORKSPACE_EXTERNAL_URL` for all OAuth endpoints: +```bash +# This configures all OAuth endpoints to use your external URL +export WORKSPACE_EXTERNAL_URL="https://your-domain.com" +``` + +**Solution 2**: Set `GOOGLE_OAUTH_REDIRECT_URI` for just the callback: +```bash +# This only overrides the OAuth callback URL +export GOOGLE_OAUTH_REDIRECT_URI="https://your-domain.com/oauth2callback" +``` + +You also have options for: +| `OAUTH_CUSTOM_REDIRECT_URIS` *(optional)* | Comma-separated list of additional redirect URIs | +| `OAUTH_ALLOWED_ORIGINS` *(optional)* | Comma-separated list of additional CORS origins | + +**Important**: +- Use `WORKSPACE_EXTERNAL_URL` when all OAuth endpoints should use the external URL (recommended for reverse proxy setups) +- Use `GOOGLE_OAUTH_REDIRECT_URI` when you only need to override the callback URL +- The redirect URI must exactly match what's configured in your Google Cloud Console +- Your reverse proxy must forward OAuth-related requests (`/oauth2callback`, `/oauth2/*`, `/.well-known/*`) to the MCP server + +
+🚀 Advanced uvx Commands ← More startup options + +```bash +# Configure credentials first (see Credential Configuration section) + +# Start with specific tools only +uvx workspace-mcp --tools gmail drive calendar tasks + +# Start with tool tiers (recommended for most users) +uvx workspace-mcp --tool-tier core # Essential tools +uvx workspace-mcp --tool-tier extended # Core + additional features +uvx workspace-mcp --tool-tier complete # All tools + +# Start in HTTP mode for debugging +uvx workspace-mcp --transport streamable-http +``` +
+ +*Requires Python 3.10+ and [uvx](https://github.com/astral-sh/uv). The package is available on [PyPI](https://pypi.org/project/workspace-mcp).* + +### Development Installation + +For development or customization: + +```bash +git clone https://github.com/taylorwilsdon/google_workspace_mcp.git +cd google_workspace_mcp +uv run main.py +``` + +**Development Installation (For Contributors)**: + +
+🔧 Developer Setup JSON ← For contributors & customization + +```json +{ + "mcpServers": { + "google_workspace": { + "command": "uv", + "args": [ + "run", + "--directory", + "/path/to/repo/google_workspace_mcp", + "main.py" + ], + "env": { + "GOOGLE_OAUTH_CLIENT_ID": "your-client-id", + "GOOGLE_OAUTH_CLIENT_SECRET": "your-secret", + "OAUTHLIB_INSECURE_TRANSPORT": "1" + } + } + } +} +``` +
+ +#### HTTP Mode (For debugging or web interfaces) +If you need to use HTTP mode with Claude Desktop: + +```json +{ + "mcpServers": { + "google_workspace": { + "command": "npx", + "args": ["mcp-remote", "http://localhost:8000/mcp"] + } + } +} +``` + +*Note: Make sure to start the server with `--transport streamable-http` when using HTTP mode.* + +### First-Time Authentication + +The server uses **Google Desktop OAuth** for simplified authentication: + +- **No redirect URIs needed**: Desktop OAuth clients handle authentication without complex callback URLs +- **Automatic flow**: The server manages the entire OAuth process transparently +- **Transport-agnostic**: Works seamlessly in both stdio and HTTP modes + +When calling a tool: +1. Server returns authorization URL +2. Open URL in browser and authorize +3. Google provides an authorization code +4. Paste the code when prompted (or it's handled automatically) +5. Server completes authentication and retries your request + +--- + +## ◆ Development + +### Project Structure + +``` +google_workspace_mcp/ +├── auth/ # Authentication system with decorators +├── core/ # MCP server and utilities +├── g{service}/ # Service-specific tools +├── main.py # Server entry point +├── client_secret.json # OAuth credentials (not committed) +└── pyproject.toml # Dependencies +``` + +### Adding New Tools + +```python +from auth.service_decorator import require_google_service + +@require_google_service("drive", "drive_read") # Service + scope group +async def your_new_tool(service, param1: str, param2: int = 10): + """Tool description""" + # service is automatically injected and cached + result = service.files().list().execute() + return result # Return native Python objects +``` + +### Architecture Highlights + +- **Service Caching**: 30-minute TTL reduces authentication overhead +- **Scope Management**: Centralized in `SCOPE_GROUPS` for easy maintenance +- **Error Handling**: Native exceptions instead of manual error construction +- **Multi-Service Support**: `@require_multiple_services()` for complex tools + +### Credential Store System + +The server includes an abstract credential store API and a default backend for managing Google OAuth +credentials with support for multiple storage backends: + +**Features:** +- **Abstract Interface**: `CredentialStore` base class defines standard operations (get, store, delete, list users) +- **Local File Storage**: `LocalDirectoryCredentialStore` implementation stores credentials as JSON files +- **Configurable Storage**: Environment variable `GOOGLE_MCP_CREDENTIALS_DIR` sets storage location +- **Multi-User Support**: Store and manage credentials for multiple Google accounts +- **Automatic Directory Creation**: Storage directory is created automatically if it doesn't exist + +**Configuration:** +```bash +# Optional: Set custom credentials directory +export GOOGLE_MCP_CREDENTIALS_DIR="/path/to/credentials" + +# Default locations (if GOOGLE_MCP_CREDENTIALS_DIR not set): +# - ~/.google_workspace_mcp/credentials (if home directory accessible) +# - ./.credentials (fallback) +``` + +**Usage Example:** +```python +from auth.credential_store import get_credential_store + +# Get the global credential store instance +store = get_credential_store() + +# Store credentials for a user +store.store_credential("user@example.com", credentials) + +# Retrieve credentials +creds = store.get_credential("user@example.com") + +# List all users with stored credentials +users = store.list_users() +``` + +The credential store automatically handles credential serialization, expiry parsing, and provides error handling for storage operations. + +--- + +## ⊠ Security + +- **Credentials**: Never commit `.env`, `client_secret.json` or the `.credentials/` directory to source control! +- **OAuth Callback**: Uses `http://localhost:8000/oauth2callback` for development (requires `OAUTHLIB_INSECURE_TRANSPORT=1`) +- **Transport-Aware Callbacks**: Stdio mode starts a minimal HTTP server only for OAuth, ensuring callbacks work in all modes +- **Production**: Use HTTPS & OAuth 2.1 and configure accordingly +- **Scope Minimization**: Tools request only necessary permissions +- **Local File Access Control**: Tools that read local files (e.g., attachments, `file://` uploads) are restricted to the user's home directory by default. Override this with the `ALLOWED_FILE_DIRS` environment variable: + ```bash + # Colon-separated list of directories (semicolon on Windows) from which local file reads are permitted + export ALLOWED_FILE_DIRS="/home/user/documents:/data/shared" + ``` + Regardless of the allowlist, access to sensitive paths (`.env`, `.ssh/`, `.aws/`, `/etc/shadow`, credential files, etc.) is always blocked. + +--- + + +--- + +## ≡ License + +MIT License - see `LICENSE` file for details. + +--- + +Validations: +[![MCP Badge](https://lobehub.com/badge/mcp/taylorwilsdon-google_workspace_mcp)](https://lobehub.com/mcp/taylorwilsdon-google_workspace_mcp) + +[![Verified on MseeP](https://mseep.ai/badge.svg)](https://mseep.ai/app/eebbc4a6-0f8c-41b2-ace8-038e5516dba0) + + +
+Batch Emails +
diff --git a/README_NEW.md b/README_NEW.md new file mode 100644 index 0000000..9e01ba0 --- /dev/null +++ b/README_NEW.md @@ -0,0 +1,473 @@ +
+ +# Google Workspace MCP Server + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Python 3.10+](https://img.shields.io/badge/Python-3.10%2B-blue.svg)](https://www.python.org/downloads/) +[![PyPI](https://img.shields.io/pypi/v/workspace-mcp.svg)](https://pypi.org/project/workspace-mcp/) + +**Complete Google Workspace control through natural language.** Gmail, Calendar, Drive, Docs, Sheets, Slides, Forms, Tasks, Chat, Apps Script, and Custom Search—all via MCP. + +[Quick Start](#-quick-start) • [Tools Reference](#-tools-reference) • [Configuration](#-configuration) • [OAuth Setup](#-oauth-setup) + +
+ +--- + +## ⚡ Quick Start + +### One-Click Install (Claude Desktop) + +1. Download `google_workspace_mcp.dxt` from [Releases](https://github.com/taylorwilsdon/google_workspace_mcp/releases) +2. Double-click → Claude Desktop installs automatically +3. Add your Google OAuth credentials in Settings → Extensions + +### CLI Install + +```bash +# Instant run (no install) +uvx workspace-mcp + +# With specific tools only +uvx workspace-mcp --tools gmail drive calendar + +# With tool tier +uvx workspace-mcp --tool-tier core +``` + +### Environment Variables + +```bash +export GOOGLE_OAUTH_CLIENT_ID="your-client-id" +export GOOGLE_OAUTH_CLIENT_SECRET="your-client-secret" +export OAUTHLIB_INSECURE_TRANSPORT=1 # Development only +``` + +--- + +## 🛠 Tools Reference + +### Gmail (10 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_gmail_messages` | Core | Search with Gmail operators, returns message/thread IDs with web links | +| `get_gmail_message_content` | Core | Get full message: subject, sender, body, attachments | +| `get_gmail_messages_content_batch` | Core | Batch retrieve up to 25 messages | +| `send_gmail_message` | Core | Send emails with HTML support, CC/BCC, threading | +| `get_gmail_thread_content` | Extended | Get complete conversation thread | +| `draft_gmail_message` | Extended | Create drafts with threading support | +| `list_gmail_labels` | Extended | List all system and user labels | +| `manage_gmail_label` | Extended | Create, update, delete labels | +| `modify_gmail_message_labels` | Extended | Add/remove labels (archive, trash, etc.) | +| `manage_gmail_filter` | Extended | Create or delete Gmail filters | +| `get_gmail_threads_content_batch` | Complete | Batch retrieve threads | +| `batch_modify_gmail_message_labels` | Complete | Bulk label operations | + +**Also includes:** `get_gmail_attachment_content`, `list_gmail_filters` + +### Google Drive (10 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_drive_files` | Core | Search files with Drive query syntax or free text | +| `get_drive_file_content` | Core | Read content from Docs, Sheets, Office files (.docx, .xlsx, .pptx) | +| `get_drive_file_download_url` | Core | Download Drive files to local disk | +| `create_drive_file` | Core | Create files from content or URL (supports file://, http://, https://) | +| `create_drive_folder` | Core | Create empty folders in Drive or shared drives | +| `import_to_google_doc` | Core | Import files (MD, DOCX, HTML, etc.) as Google Docs | +| `get_drive_shareable_link` | Core | Get shareable links for a file | +| `list_drive_items` | Extended | List folder contents with shared drive support | +| `copy_drive_file` | Extended | Copy existing files (templates) with optional renaming | +| `update_drive_file` | Extended | Update metadata, move between folders, star, trash | +| `manage_drive_access` | Extended | Grant, update, revoke permissions, and transfer ownership | +| `set_drive_file_permissions` | Extended | Set link sharing and file-level sharing settings | +| `get_drive_file_permissions` | Complete | Get detailed file permissions | +| `check_drive_file_public_access` | Complete | Verify public link sharing for Docs image insertion | + +### Google Calendar (3 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `list_calendars` | Core | List all accessible calendars | +| `get_events` | Core | Query events by time range, search, or specific ID | +| `manage_event` | Core | Create, update, or delete calendar events | + +**Event features:** Timezone support, transparency (busy/free), visibility settings, up to 5 custom reminders, Google Meet integration, attendees, attachments + +### Google Docs (14 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `get_doc_content` | Core | Extract text from Docs or .docx files (supports tabs) | +| `create_doc` | Core | Create new documents with optional initial content | +| `modify_doc_text` | Core | Insert, replace, format text (bold, italic, colors, fonts, links) | +| `search_docs` | Extended | Find documents by name | +| `find_and_replace_doc` | Extended | Global find/replace with case matching | +| `list_docs_in_folder` | Extended | List Docs in a specific folder | +| `insert_doc_elements` | Extended | Add tables, lists, page breaks | +| `update_paragraph_style` | Extended | Apply heading styles, lists (bulleted/numbered with nesting), and paragraph formatting | +| `get_doc_as_markdown` | Extended | Export document as formatted Markdown with optional comments | +| `export_doc_to_pdf` | Extended | Export to PDF and save to Drive | +| `insert_doc_image` | Complete | Insert images from Drive or URLs | +| `update_doc_headers_footers` | Complete | Modify headers/footers | +| `batch_update_doc` | Complete | Execute multiple operations atomically | +| `inspect_doc_structure` | Complete | Analyze document structure for safe insertion points | +| `create_table_with_data` | Complete | Create and populate tables in one operation | +| `debug_table_structure` | Complete | Debug table cell positions and content | +| `list_document_comments` | Complete | List all document comments | +| `manage_document_comment` | Complete | Create, reply to, or resolve comments | + +### Google Sheets (9 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `read_sheet_values` | Core | Read cell ranges with formatted output | +| `modify_sheet_values` | Core | Write, update, or clear cell values | +| `create_spreadsheet` | Core | Create new spreadsheets with multiple sheets | +| `list_spreadsheets` | Extended | List accessible spreadsheets | +| `get_spreadsheet_info` | Extended | Get metadata, sheets, conditional formats | +| `format_sheet_range` | Extended | Apply colors, number formats, text wrapping, alignment, bold/italic, font size | +| `create_sheet` | Complete | Add sheets to existing spreadsheets | +| `list_spreadsheet_comments` | Complete | List all spreadsheet comments | +| `manage_spreadsheet_comment` | Complete | Create, reply to, or resolve comments | +| `manage_conditional_formatting` | Complete | Add, update, or delete conditional formatting rules | + +### Google Slides (7 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `create_presentation` | Core | Create new presentations | +| `get_presentation` | Core | Get presentation details with slide text extraction | +| `batch_update_presentation` | Extended | Apply multiple updates (create slides, shapes, etc.) | +| `get_page` | Extended | Get specific slide details and elements | +| `get_page_thumbnail` | Extended | Generate PNG thumbnails | +| `list_presentation_comments` | Complete | List all presentation comments | +| `manage_presentation_comment` | Complete | Create, reply to, or resolve comments | + +### Google Forms (6 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `create_form` | Core | Create forms with title and description | +| `get_form` | Core | Get form details, questions, and URLs | +| `list_form_responses` | Extended | List responses with pagination | +| `set_publish_settings` | Complete | Configure template and authentication settings | +| `get_form_response` | Complete | Get individual response details | +| `batch_update_form` | Complete | Execute batch updates to forms (questions, items, settings) | + +### Google Tasks (5 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `list_tasks` | Core | List tasks with filtering, subtask hierarchy preserved | +| `get_task` | Core | Get task details | +| `manage_task` | Core | Create, update, delete, or move tasks | +| `list_task_lists` | Complete | List all task lists | +| `get_task_list` | Complete | Get task list details | +| `manage_task_list` | Complete | Create, update, delete task lists, or clear completed tasks | + +### Google Apps Script (9 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `list_script_projects` | Core | List accessible Apps Script projects | +| `get_script_project` | Core | Get complete project with all files | +| `get_script_content` | Core | Retrieve specific file content | +| `create_script_project` | Core | Create new standalone or bound project | +| `update_script_content` | Core | Update or create script files | +| `run_script_function` | Core | Execute function with parameters | +| `list_deployments` | Extended | List all project deployments | +| `manage_deployment` | Extended | Create, update, or delete script deployments | +| `list_script_processes` | Extended | View recent executions and status | + +**Enables:** Cross-app automation, persistent workflows, custom business logic execution, script development and debugging + +**Note:** Trigger management is not currently supported via MCP tools. + +### Google Contacts (7 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_contacts` | Core | Search contacts by name, email, phone | +| `get_contact` | Core | Retrieve detailed contact info | +| `list_contacts` | Core | List contacts with pagination | +| `manage_contact` | Core | Create, update, or delete contacts | +| `list_contact_groups` | Extended | List contact groups/labels | +| `get_contact_group` | Extended | Get group details with members | +| `manage_contacts_batch` | Complete | Batch create, update, or delete contacts | +| `manage_contact_group` | Complete | Create, update, delete groups, or modify membership | + +### Google Chat (4 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `get_messages` | Core | Retrieve messages from a space | +| `send_message` | Core | Send messages with optional threading | +| `search_messages` | Core | Search across chat history | +| `list_spaces` | Extended | List rooms and DMs | + +### Google Custom Search (2 tools) + +| Tool | Tier | Description | +|------|------|-------------| +| `search_custom` | Core | Web search with filters (date, file type, language, safe search, site restrictions via sites parameter) | +| `get_search_engine_info` | Complete | Get search engine metadata | + +**Requires:** `GOOGLE_PSE_API_KEY` and `GOOGLE_PSE_ENGINE_ID` environment variables + +--- + +## 📊 Tool Tiers + +Choose a tier based on your needs: + +| Tier | Tools | Use Case | +|------|-------|----------| +| **Core** | ~30 | Essential operations: search, read, create, send | +| **Extended** | ~50 | Core + management: labels, folders, batch ops | +| **Complete** | 111 | Full API: comments, headers, admin functions | + +```bash +uvx workspace-mcp --tool-tier core # Start minimal +uvx workspace-mcp --tool-tier extended # Add management +uvx workspace-mcp --tool-tier complete # Everything +``` + +Mix tiers with specific services: +```bash +uvx workspace-mcp --tools gmail drive --tool-tier extended +``` + +--- + +## ⚙ Configuration + +### Required + +| Variable | Description | +|----------|-------------| +| `GOOGLE_OAUTH_CLIENT_ID` | OAuth client ID from Google Cloud | +| `GOOGLE_OAUTH_CLIENT_SECRET` | OAuth client secret | + +### Optional + +| Variable | Description | +|----------|-------------| +| `USER_GOOGLE_EMAIL` | Default email for single-user mode | +| `GOOGLE_PSE_API_KEY` | Custom Search API key | +| `GOOGLE_PSE_ENGINE_ID` | Programmable Search Engine ID | +| `MCP_ENABLE_OAUTH21` | Enable OAuth 2.1 multi-user support | +| `WORKSPACE_MCP_STATELESS_MODE` | No file writes (container-friendly) | +| `EXTERNAL_OAUTH21_PROVIDER` | External OAuth flow with bearer tokens | +| `WORKSPACE_MCP_BASE_URI` | Server base URL (default: `http://localhost`) | +| `WORKSPACE_MCP_PORT` | Server port (default: `8000`) | +| `WORKSPACE_EXTERNAL_URL` | External URL for reverse proxy setups | +| `GOOGLE_MCP_CREDENTIALS_DIR` | Custom credentials storage path | + +--- + +## 🔐 OAuth Setup + +### 1. Create Google Cloud Project + +1. Go to [Google Cloud Console](https://console.cloud.google.com/) +2. Create a new project +3. Navigate to **APIs & Services → Credentials** +4. Click **Create Credentials → OAuth Client ID** +5. Select **Desktop Application** +6. Download credentials + +### 2. Enable APIs + +Click to enable each API: + +- [Calendar](https://console.cloud.google.com/flows/enableapi?apiid=calendar-json.googleapis.com) +- [Drive](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com) +- [Gmail](https://console.cloud.google.com/flows/enableapi?apiid=gmail.googleapis.com) +- [Docs](https://console.cloud.google.com/flows/enableapi?apiid=docs.googleapis.com) +- [Sheets](https://console.cloud.google.com/flows/enableapi?apiid=sheets.googleapis.com) +- [Slides](https://console.cloud.google.com/flows/enableapi?apiid=slides.googleapis.com) +- [Forms](https://console.cloud.google.com/flows/enableapi?apiid=forms.googleapis.com) +- [Tasks](https://console.cloud.google.com/flows/enableapi?apiid=tasks.googleapis.com) +- [Chat](https://console.cloud.google.com/flows/enableapi?apiid=chat.googleapis.com) +- [Custom Search](https://console.cloud.google.com/flows/enableapi?apiid=customsearch.googleapis.com) + +### 3. First Authentication + +When you first call a tool: +1. Server returns an authorization URL +2. Open URL in browser, authorize access +3. Paste the authorization code when prompted +4. Credentials are cached for future use + +--- + +## 🚀 Transport Modes + +### Stdio (Default) + +Best for Claude Desktop and local MCP clients: + +```bash +uvx workspace-mcp +``` + +### HTTP (Streamable) + +For web interfaces, debugging, or multi-client setups: + +```bash +uvx workspace-mcp --transport streamable-http +``` + +Access at `http://localhost:8000/mcp/` + +### Docker + +```bash +docker build -t workspace-mcp . +docker run -p 8000:8000 \ + -e GOOGLE_OAUTH_CLIENT_ID="..." \ + -e GOOGLE_OAUTH_CLIENT_SECRET="..." \ + workspace-mcp --transport streamable-http +``` + +--- + +## 🔧 Client Configuration + +### Claude Desktop + +```json +{ + "mcpServers": { + "google_workspace": { + "command": "uvx", + "args": ["workspace-mcp", "--tool-tier", "core"], + "env": { + "GOOGLE_OAUTH_CLIENT_ID": "your-client-id", + "GOOGLE_OAUTH_CLIENT_SECRET": "your-secret", + "OAUTHLIB_INSECURE_TRANSPORT": "1" + } + } + } +} +``` + +### LM Studio + +```json +{ + "mcpServers": { + "google_workspace": { + "command": "uvx", + "args": ["workspace-mcp"], + "env": { + "GOOGLE_OAUTH_CLIENT_ID": "your-client-id", + "GOOGLE_OAUTH_CLIENT_SECRET": "your-secret", + "OAUTHLIB_INSECURE_TRANSPORT": "1", + "USER_GOOGLE_EMAIL": "you@example.com" + } + } + } +} +``` + +### VS Code + +```json +{ + "servers": { + "google-workspace": { + "url": "http://localhost:8000/mcp/", + "type": "http" + } + } +} +``` + +### Claude Code + +```bash +claude mcp add --transport http workspace-mcp http://localhost:8000/mcp +``` + +--- + +## 🏗 Architecture + +``` +google_workspace_mcp/ +├── auth/ # OAuth 2.0/2.1, credential storage, decorators +├── core/ # MCP server, tool registry, utilities +├── gcalendar/ # Calendar tools +├── gchat/ # Chat tools +├── gdocs/ # Docs tools + managers (tables, headers, batch) +├── gdrive/ # Drive tools + helpers +├── gforms/ # Forms tools +├── gmail/ # Gmail tools +├── gsearch/ # Custom Search tools +├── gsheets/ # Sheets tools + helpers +├── gslides/ # Slides tools +├── gtasks/ # Tasks tools +└── main.py # Entry point +``` + +### Key Patterns + +**Service Decorator:** All tools use `@require_google_service()` for automatic authentication with 30-minute service caching. + +```python +@server.tool() +@require_google_service("gmail", "gmail_read") +async def search_gmail_messages(service, user_google_email: str, query: str): + # service is injected automatically + ... +``` + +**Multi-Service Tools:** Some tools need multiple APIs: + +```python +@require_multiple_services([ + {"service_type": "drive", "scopes": "drive_read", "param_name": "drive_service"}, + {"service_type": "docs", "scopes": "docs_read", "param_name": "docs_service"}, +]) +async def get_doc_content(drive_service, docs_service, ...): + ... +``` + +--- + +## 🧪 Development + +```bash +git clone https://github.com/taylorwilsdon/google_workspace_mcp.git +cd google_workspace_mcp + +# Install with dev dependencies +uv sync --group dev + +# Run locally +uv run main.py + +# Run tests +uv run pytest + +# Lint +uv run ruff check . +``` + +--- + +## 📄 License + +MIT License - see [LICENSE](LICENSE) for details. + +--- + +
+ +**[Documentation](https://workspacemcp.com)** • **[Issues](https://github.com/taylorwilsdon/google_workspace_mcp/issues)** • **[PyPI](https://pypi.org/project/workspace-mcp/)** + +
diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..5fb6f32 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,48 @@ +# Security Policy + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.** + +Instead, please email us at **taylor@workspacemcp.com** + +Please include as much of the following information as you can to help us better understand and resolve the issue: + +- The type of issue (e.g., authentication bypass, credential exposure, command injection, etc.) +- Full paths of source file(s) related to the manifestation of the issue +- The location of the affected source code (tag/branch/commit or direct URL) +- Any special configuration required to reproduce the issue +- Step-by-step instructions to reproduce the issue +- Proof-of-concept or exploit code (if possible) +- Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +## Supported Versions + +We release patches for security vulnerabilities. Which versions are eligible for receiving such patches depends on the CVSS v3.0 Rating: + +| Version | Supported | +| ------- | ------------------ | +| 1.4.x | :white_check_mark: | +| < 1.4 | :x: | + +## Security Considerations + +When using this MCP server, please ensure: + +1. Store Google OAuth credentials securely +2. Never commit credentials to version control +3. Use environment variables for sensitive configuration +4. Regularly rotate OAuth refresh tokens +5. Limit OAuth scopes to only what's necessary + +For more information on securing your use of the project, see https://workspacemcp.com/privacy + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +We follow the principle of responsible disclosure. We will make every effort to address security issues in a timely manner and will coordinate with reporters to understand and resolve issues before public disclosure. \ No newline at end of file diff --git a/auth/__init__.py b/auth/__init__.py new file mode 100644 index 0000000..2762636 --- /dev/null +++ b/auth/__init__.py @@ -0,0 +1 @@ +# Make the auth directory a Python package diff --git a/auth/auth_info_middleware.py b/auth/auth_info_middleware.py new file mode 100644 index 0000000..af11993 --- /dev/null +++ b/auth/auth_info_middleware.py @@ -0,0 +1,378 @@ +""" +Authentication middleware to populate context state with user information +""" + +import logging +import time + +from fastmcp.server.middleware import Middleware, MiddlewareContext +from fastmcp.server.dependencies import get_access_token +from fastmcp.server.dependencies import get_http_headers + +from auth.external_oauth_provider import get_session_time +from auth.oauth21_session_store import ensure_session_from_access_token +from auth.oauth_types import WorkspaceAccessToken + +# Configure logging +logger = logging.getLogger(__name__) + + +class AuthInfoMiddleware(Middleware): + """ + Middleware to extract authentication information from JWT tokens + and populate the FastMCP context state for use in tools and prompts. + """ + + def __init__(self): + super().__init__() + self.auth_provider_type = "GoogleProvider" + + async def _process_request_for_auth(self, context: MiddlewareContext): + """Helper to extract, verify, and store auth info from a request.""" + if not context.fastmcp_context: + logger.warning("No fastmcp_context available") + return + + authenticated_user = None + auth_via = None + + # First check if FastMCP has already validated an access token + try: + access_token = get_access_token() + if access_token: + logger.info("[AuthInfoMiddleware] FastMCP access_token found") + user_email = getattr(access_token, "email", None) + if not user_email and hasattr(access_token, "claims"): + user_email = access_token.claims.get("email") + + if user_email: + logger.info( + f"✓ Using FastMCP validated token for user: {user_email}" + ) + await context.fastmcp_context.set_state( + "authenticated_user_email", user_email + ) + await context.fastmcp_context.set_state( + "authenticated_via", "fastmcp_oauth" + ) + await context.fastmcp_context.set_state( + "access_token", access_token, serializable=False + ) + authenticated_user = user_email + auth_via = "fastmcp_oauth" + else: + logger.warning( + f"FastMCP access_token found but no email. Type: {type(access_token).__name__}" + ) + except Exception as e: + logger.debug(f"Could not get FastMCP access_token: {e}") + + # Try to get the HTTP request to extract Authorization header + if not authenticated_user: + try: + # Use the new FastMCP method to get HTTP headers + headers = get_http_headers() + logger.info( + f"[AuthInfoMiddleware] get_http_headers() returned: {headers is not None}, keys: {list(headers.keys()) if headers else 'None'}" + ) + if headers: + logger.debug("Processing HTTP headers for authentication") + + # Get the Authorization header + auth_header = headers.get("authorization", "") + if auth_header.startswith("Bearer "): + token_str = auth_header[7:] # Remove "Bearer " prefix + logger.info("Found Bearer token in request") + + # For Google OAuth tokens (ya29.*), we need to verify them differently + if token_str.startswith("ya29."): + logger.debug("Detected Google OAuth access token format") + + # Verify the token to get user info + from core.server import get_auth_provider + + auth_provider = get_auth_provider() + + if auth_provider: + try: + # Verify the token + verified_auth = await auth_provider.verify_token( + token_str + ) + if verified_auth: + # Extract user email from verified token + user_email = getattr( + verified_auth, "email", None + ) + if not user_email and hasattr( + verified_auth, "claims" + ): + user_email = verified_auth.claims.get( + "email" + ) + + if isinstance( + verified_auth, WorkspaceAccessToken + ): + # ExternalOAuthProvider returns a fully-formed WorkspaceAccessToken + access_token = verified_auth + else: + # Standard GoogleProvider returns a base AccessToken; + # wrap it in WorkspaceAccessToken for identical downstream handling + verified_expires = getattr( + verified_auth, "expires_at", None + ) + access_token = WorkspaceAccessToken( + token=token_str, + client_id=getattr( + verified_auth, "client_id", None + ) + or "google", + scopes=getattr( + verified_auth, "scopes", [] + ) + or [], + session_id=f"google_oauth_{token_str[:8]}", + expires_at=verified_expires + if verified_expires is not None + else int(time.time()) + + get_session_time(), + claims=getattr( + verified_auth, "claims", {} + ) + or {}, + sub=getattr(verified_auth, "sub", None) + or user_email, + email=user_email, + ) + + # Store in context state - this is the authoritative authentication state + await context.fastmcp_context.set_state( + "access_token", + access_token, + serializable=False, + ) + mcp_session_id = getattr( + context.fastmcp_context, "session_id", None + ) + ensure_session_from_access_token( + access_token, + user_email, + mcp_session_id, + ) + await context.fastmcp_context.set_state( + "auth_provider_type", + self.auth_provider_type, + ) + await context.fastmcp_context.set_state( + "token_type", "google_oauth" + ) + await context.fastmcp_context.set_state( + "user_email", user_email + ) + await context.fastmcp_context.set_state( + "username", user_email + ) + # Set the definitive authentication state + await context.fastmcp_context.set_state( + "authenticated_user_email", user_email + ) + await context.fastmcp_context.set_state( + "authenticated_via", "bearer_token" + ) + authenticated_user = user_email + auth_via = "bearer_token" + else: + logger.error( + "Failed to verify Google OAuth token" + ) + except Exception as e: + logger.error( + f"Error verifying Google OAuth token: {e}" + ) + else: + logger.warning( + "No auth provider available to verify Google token" + ) + + else: + # Non-Google JWT tokens require verification + # SECURITY: Never set authenticated_user_email from unverified tokens + logger.debug( + "Unverified JWT token rejected - only verified tokens accepted" + ) + else: + logger.debug("No Bearer token in Authorization header") + else: + logger.debug( + "No HTTP headers available (might be using stdio transport)" + ) + except Exception as e: + logger.debug(f"Could not get HTTP request: {e}") + + # After trying HTTP headers, check for other authentication methods + # This consolidates all authentication logic in the middleware + if not authenticated_user: + logger.debug( + "No authentication found via bearer token, checking other methods" + ) + + # Check transport mode + from core.config import get_transport_mode + + transport_mode = get_transport_mode() + + if transport_mode == "stdio": + # In stdio mode, check if there's a session with credentials + # This is ONLY safe in stdio mode because it's single-user + logger.debug("Checking for stdio mode authentication") + + # Get the requested user from the context if available + requested_user = None + if hasattr(context, "request") and hasattr(context.request, "params"): + requested_user = context.request.params.get("user_google_email") + elif hasattr(context, "arguments"): + # FastMCP may store arguments differently + requested_user = context.arguments.get("user_google_email") + + if requested_user: + try: + from auth.oauth21_session_store import get_oauth21_session_store + + store = get_oauth21_session_store() + + # Check if user has a recent session + if store.has_session(requested_user): + logger.debug( + f"Using recent stdio session for {requested_user}" + ) + # In stdio mode, we can trust the user has authenticated recently + await context.fastmcp_context.set_state( + "authenticated_user_email", requested_user + ) + await context.fastmcp_context.set_state( + "authenticated_via", "stdio_session" + ) + await context.fastmcp_context.set_state( + "auth_provider_type", "oauth21_stdio" + ) + authenticated_user = requested_user + auth_via = "stdio_session" + except Exception as e: + logger.debug(f"Error checking stdio session: {e}") + + # If no requested user was provided but exactly one session exists, assume it in stdio mode + if not authenticated_user: + try: + from auth.oauth21_session_store import get_oauth21_session_store + + store = get_oauth21_session_store() + single_user = store.get_single_user_email() + if single_user: + logger.debug( + f"Defaulting to single stdio OAuth session for {single_user}" + ) + await context.fastmcp_context.set_state( + "authenticated_user_email", single_user + ) + await context.fastmcp_context.set_state( + "authenticated_via", "stdio_single_session" + ) + await context.fastmcp_context.set_state( + "auth_provider_type", "oauth21_stdio" + ) + await context.fastmcp_context.set_state( + "user_email", single_user + ) + await context.fastmcp_context.set_state( + "username", single_user + ) + authenticated_user = single_user + auth_via = "stdio_single_session" + except Exception as e: + logger.debug( + f"Error determining stdio single-user session: {e}" + ) + + # Check for MCP session binding + if not authenticated_user and hasattr( + context.fastmcp_context, "session_id" + ): + mcp_session_id = context.fastmcp_context.session_id + if mcp_session_id: + try: + from auth.oauth21_session_store import get_oauth21_session_store + + store = get_oauth21_session_store() + + # Check if this MCP session is bound to a user + bound_user = store.get_user_by_mcp_session(mcp_session_id) + if bound_user: + logger.debug(f"MCP session bound to {bound_user}") + await context.fastmcp_context.set_state( + "authenticated_user_email", bound_user + ) + await context.fastmcp_context.set_state( + "authenticated_via", "mcp_session_binding" + ) + await context.fastmcp_context.set_state( + "auth_provider_type", "oauth21_session" + ) + authenticated_user = bound_user + auth_via = "mcp_session_binding" + except Exception as e: + logger.debug(f"Error checking MCP session binding: {e}") + + # Single exit point with logging + if authenticated_user: + logger.info(f"✓ Authenticated via {auth_via}: {authenticated_user}") + auth_email = await context.fastmcp_context.get_state( + "authenticated_user_email" + ) + logger.debug( + f"Context state after auth: authenticated_user_email={auth_email}" + ) + + async def on_call_tool(self, context: MiddlewareContext, call_next): + """Extract auth info from token and set in context state""" + logger.debug("Processing tool call authentication") + + try: + await self._process_request_for_auth(context) + + logger.debug("Passing to next handler") + result = await call_next(context) + logger.debug("Handler completed") + return result + + except Exception as e: + # Check if this is an authentication error - don't log traceback for these + if "GoogleAuthenticationError" in str( + type(e) + ) or "Access denied: Cannot retrieve credentials" in str(e): + logger.info(f"Authentication check failed: {e}") + else: + logger.error(f"Error in on_call_tool middleware: {e}", exc_info=True) + raise + + async def on_get_prompt(self, context: MiddlewareContext, call_next): + """Extract auth info for prompt requests too""" + logger.debug("Processing prompt authentication") + + try: + await self._process_request_for_auth(context) + + logger.debug("Passing prompt to next handler") + result = await call_next(context) + logger.debug("Prompt handler completed") + return result + + except Exception as e: + # Check if this is an authentication error - don't log traceback for these + if "GoogleAuthenticationError" in str( + type(e) + ) or "Access denied: Cannot retrieve credentials" in str(e): + logger.info(f"Authentication check failed in prompt: {e}") + else: + logger.error(f"Error in on_get_prompt middleware: {e}", exc_info=True) + raise diff --git a/auth/credential_store.py b/auth/credential_store.py new file mode 100644 index 0000000..9dff429 --- /dev/null +++ b/auth/credential_store.py @@ -0,0 +1,266 @@ +""" +Credential Store API for Google Workspace MCP + +This module provides a standardized interface for credential storage and retrieval, +supporting multiple backends configurable via environment variables. +""" + +import os +import json +import logging +from abc import ABC, abstractmethod +from typing import Optional, List +from datetime import datetime +from google.oauth2.credentials import Credentials + +logger = logging.getLogger(__name__) + + +class CredentialStore(ABC): + """Abstract base class for credential storage.""" + + @abstractmethod + def get_credential(self, user_email: str) -> Optional[Credentials]: + """ + Get credentials for a user by email. + + Args: + user_email: User's email address + + Returns: + Google Credentials object or None if not found + """ + pass + + @abstractmethod + def store_credential(self, user_email: str, credentials: Credentials) -> bool: + """ + Store credentials for a user. + + Args: + user_email: User's email address + credentials: Google Credentials object to store + + Returns: + True if successfully stored, False otherwise + """ + pass + + @abstractmethod + def delete_credential(self, user_email: str) -> bool: + """ + Delete credentials for a user. + + Args: + user_email: User's email address + + Returns: + True if successfully deleted, False otherwise + """ + pass + + @abstractmethod + def list_users(self) -> List[str]: + """ + List all users with stored credentials. + + Returns: + List of user email addresses + """ + pass + + +class LocalDirectoryCredentialStore(CredentialStore): + """Credential store that uses local JSON files for storage.""" + + def __init__(self, base_dir: Optional[str] = None): + """ + Initialize the local JSON credential store. + + Args: + base_dir: Base directory for credential files. If None, uses the directory + configured by environment variables in this order: + 1. WORKSPACE_MCP_CREDENTIALS_DIR (preferred) + 2. GOOGLE_MCP_CREDENTIALS_DIR (backward compatibility) + 3. ~/.google_workspace_mcp/credentials (default) + """ + if base_dir is None: + # Check WORKSPACE_MCP_CREDENTIALS_DIR first (preferred) + workspace_creds_dir = os.getenv("WORKSPACE_MCP_CREDENTIALS_DIR") + google_creds_dir = os.getenv("GOOGLE_MCP_CREDENTIALS_DIR") + + if workspace_creds_dir: + base_dir = os.path.expanduser(workspace_creds_dir) + logger.info( + f"Using credentials directory from WORKSPACE_MCP_CREDENTIALS_DIR: {base_dir}" + ) + # Fall back to GOOGLE_MCP_CREDENTIALS_DIR for backward compatibility + elif google_creds_dir: + base_dir = os.path.expanduser(google_creds_dir) + logger.info( + f"Using credentials directory from GOOGLE_MCP_CREDENTIALS_DIR: {base_dir}" + ) + else: + home_dir = os.path.expanduser("~") + if home_dir and home_dir != "~": + base_dir = os.path.join( + home_dir, ".google_workspace_mcp", "credentials" + ) + else: + base_dir = os.path.join(os.getcwd(), ".credentials") + logger.info(f"Using default credentials directory: {base_dir}") + + self.base_dir = base_dir + logger.info( + f"LocalDirectoryCredentialStore initialized with base_dir: {base_dir}" + ) + + def _get_credential_path(self, user_email: str) -> str: + """Get the file path for a user's credentials.""" + if not os.path.exists(self.base_dir): + os.makedirs(self.base_dir) + logger.info(f"Created credentials directory: {self.base_dir}") + return os.path.join(self.base_dir, f"{user_email}.json") + + def get_credential(self, user_email: str) -> Optional[Credentials]: + """Get credentials from local JSON file.""" + creds_path = self._get_credential_path(user_email) + + if not os.path.exists(creds_path): + logger.debug(f"No credential file found for {user_email} at {creds_path}") + return None + + try: + with open(creds_path, "r") as f: + creds_data = json.load(f) + + # Parse expiry if present + expiry = None + if creds_data.get("expiry"): + try: + expiry = datetime.fromisoformat(creds_data["expiry"]) + # Ensure timezone-naive datetime for Google auth library compatibility + if expiry.tzinfo is not None: + expiry = expiry.replace(tzinfo=None) + except (ValueError, TypeError) as e: + logger.warning(f"Could not parse expiry time for {user_email}: {e}") + + credentials = Credentials( + token=creds_data.get("token"), + refresh_token=creds_data.get("refresh_token"), + token_uri=creds_data.get("token_uri"), + client_id=creds_data.get("client_id"), + client_secret=creds_data.get("client_secret"), + scopes=creds_data.get("scopes"), + expiry=expiry, + ) + + logger.debug(f"Loaded credentials for {user_email} from {creds_path}") + return credentials + + except (IOError, json.JSONDecodeError, KeyError) as e: + logger.error( + f"Error loading credentials for {user_email} from {creds_path}: {e}" + ) + return None + + def store_credential(self, user_email: str, credentials: Credentials) -> bool: + """Store credentials to local JSON file.""" + creds_path = self._get_credential_path(user_email) + + creds_data = { + "token": credentials.token, + "refresh_token": credentials.refresh_token, + "token_uri": credentials.token_uri, + "client_id": credentials.client_id, + "client_secret": credentials.client_secret, + "scopes": credentials.scopes, + "expiry": credentials.expiry.isoformat() if credentials.expiry else None, + } + + try: + with open(creds_path, "w") as f: + json.dump(creds_data, f, indent=2) + logger.info(f"Stored credentials for {user_email} to {creds_path}") + return True + except IOError as e: + logger.error( + f"Error storing credentials for {user_email} to {creds_path}: {e}" + ) + return False + + def delete_credential(self, user_email: str) -> bool: + """Delete credential file for a user.""" + creds_path = self._get_credential_path(user_email) + + try: + if os.path.exists(creds_path): + os.remove(creds_path) + logger.info(f"Deleted credentials for {user_email} from {creds_path}") + return True + else: + logger.debug( + f"No credential file to delete for {user_email} at {creds_path}" + ) + return True # Consider it a success if file doesn't exist + except IOError as e: + logger.error( + f"Error deleting credentials for {user_email} from {creds_path}: {e}" + ) + return False + + def list_users(self) -> List[str]: + """List all users with credential files.""" + if not os.path.exists(self.base_dir): + return [] + + users = [] + non_credential_files = {"oauth_states"} + try: + for filename in os.listdir(self.base_dir): + if filename.endswith(".json"): + user_email = filename[:-5] # Remove .json extension + if user_email in non_credential_files or "@" not in user_email: + continue + users.append(user_email) + logger.debug( + f"Found {len(users)} users with credentials in {self.base_dir}" + ) + except OSError as e: + logger.error(f"Error listing credential files in {self.base_dir}: {e}") + + return sorted(users) + + +# Global credential store instance +_credential_store: Optional[CredentialStore] = None + + +def get_credential_store() -> CredentialStore: + """ + Get the global credential store instance. + + Returns: + Configured credential store instance + """ + global _credential_store + + if _credential_store is None: + # always use LocalJsonCredentialStore as the default + # Future enhancement: support other backends via environment variables + _credential_store = LocalDirectoryCredentialStore() + logger.info(f"Initialized credential store: {type(_credential_store).__name__}") + + return _credential_store + + +def set_credential_store(store: CredentialStore): + """ + Set the global credential store instance. + + Args: + store: Credential store instance to use + """ + global _credential_store + _credential_store = store + logger.info(f"Set credential store: {type(store).__name__}") diff --git a/auth/external_oauth_provider.py b/auth/external_oauth_provider.py new file mode 100644 index 0000000..c4103ed --- /dev/null +++ b/auth/external_oauth_provider.py @@ -0,0 +1,188 @@ +""" +External OAuth Provider for Google Workspace MCP + +Extends FastMCP's GoogleProvider to support external OAuth flows where +access tokens (ya29.*) are issued by external systems and need validation. + +This provider acts as a Resource Server only - it validates tokens issued by +Google's Authorization Server but does not issue tokens itself. +""" + +import functools +import logging +import os +import time +from typing import Optional + +from starlette.routing import Route +from fastmcp.server.auth.providers.google import GoogleProvider +from fastmcp.server.auth import AccessToken +from google.oauth2.credentials import Credentials + +from auth.oauth_types import WorkspaceAccessToken + +logger = logging.getLogger(__name__) + +# Google's OAuth 2.0 Authorization Server +GOOGLE_ISSUER_URL = "https://accounts.google.com" + +# Configurable session time in seconds (default: 1 hour, max: 24 hours) +_DEFAULT_SESSION_TIME = 3600 +_MAX_SESSION_TIME = 86400 + + +@functools.lru_cache(maxsize=1) +def get_session_time() -> int: + """Parse SESSION_TIME from environment with fallback, min/max clamp. + + Result is cached; changes require a server restart. + """ + raw = os.getenv("SESSION_TIME", "") + if not raw: + return _DEFAULT_SESSION_TIME + try: + value = int(raw) + except ValueError: + logger.warning( + "Invalid SESSION_TIME=%r, falling back to %d", raw, _DEFAULT_SESSION_TIME + ) + return _DEFAULT_SESSION_TIME + clamped = max(1, min(value, _MAX_SESSION_TIME)) + if clamped != value: + logger.warning( + "SESSION_TIME=%d clamped to %d (allowed range: 1–%d)", + value, + clamped, + _MAX_SESSION_TIME, + ) + return clamped + + +class ExternalOAuthProvider(GoogleProvider): + """ + Extended GoogleProvider that supports validating external Google OAuth access tokens. + + This provider handles ya29.* access tokens by calling Google's userinfo API, + while maintaining compatibility with standard JWT ID tokens. + + Unlike the standard GoogleProvider, this acts as a Resource Server only: + - Does NOT create /authorize, /token, /register endpoints + - Only advertises Google's authorization server in metadata + - Only validates tokens, does not issue them + """ + + def __init__( + self, + client_id: str, + client_secret: str, + resource_server_url: Optional[str] = None, + **kwargs, + ): + """Initialize and store client credentials for token validation.""" + self._resource_server_url = resource_server_url + super().__init__(client_id=client_id, client_secret=client_secret, **kwargs) + # Store credentials as they're not exposed by parent class + self._client_id = client_id + self._client_secret = client_secret + # Store as string - Pydantic validates it when passed to models + self.resource_server_url = self._resource_server_url + + async def verify_token(self, token: str) -> Optional[AccessToken]: + """ + Verify a token - supports both JWT ID tokens and ya29.* access tokens. + + For ya29.* access tokens (issued externally), validates by calling + Google's userinfo API. For JWT tokens, delegates to parent class. + + Args: + token: Token string to verify (JWT or ya29.* access token) + + Returns: + AccessToken object if valid, None otherwise + """ + # For ya29.* access tokens, validate using Google's userinfo API + if token.startswith("ya29."): + logger.debug("Validating external Google OAuth access token") + + try: + from auth.google_auth import get_user_info + + # Create minimal Credentials object for userinfo API call + credentials = Credentials( + token=token, + token_uri="https://oauth2.googleapis.com/token", + client_id=self._client_id, + client_secret=self._client_secret, + ) + + # Validate token by calling userinfo API + user_info = get_user_info(credentials, skip_valid_check=True) + + if user_info and user_info.get("email"): + session_time = get_session_time() + # Token is valid - create AccessToken object + logger.info( + f"Validated external access token for: {user_info['email']}" + ) + + scope_list = list(getattr(self, "required_scopes", []) or []) + access_token = WorkspaceAccessToken( + token=token, + scopes=scope_list, + expires_at=int(time.time()) + session_time, + claims={ + "email": user_info["email"], + "sub": user_info.get("id"), + }, + client_id=self._client_id, + email=user_info["email"], + sub=user_info.get("id"), + ) + return access_token + else: + logger.error("Could not get user info from access token") + return None + + except Exception as e: + logger.error(f"Error validating external access token: {e}") + return None + + # For JWT tokens, use parent class implementation + return await super().verify_token(token) + + def get_routes(self, **kwargs) -> list[Route]: + """ + Get OAuth routes for external provider mode. + + Returns only protected resource metadata routes that point to Google + as the authorization server. Does not create authorization server routes + (/authorize, /token, etc.) since tokens are issued by Google directly. + + Args: + **kwargs: Additional arguments passed by FastMCP (e.g., mcp_path) + + Returns: + List of routes - only protected resource metadata + """ + from mcp.server.auth.routes import create_protected_resource_routes + + if not self.resource_server_url: + logger.warning( + "ExternalOAuthProvider: resource_server_url not set, no routes created" + ) + return [] + + # Create protected resource routes that point to Google as the authorization server + # Pass strings directly - Pydantic validates them during model construction + protected_routes = create_protected_resource_routes( + resource_url=self.resource_server_url, + authorization_servers=[GOOGLE_ISSUER_URL], + scopes_supported=self.required_scopes, + resource_name="Google Workspace MCP", + resource_documentation=None, + ) + + logger.info( + f"ExternalOAuthProvider: Created protected resource routes pointing to {GOOGLE_ISSUER_URL}" + ) + return protected_routes diff --git a/auth/google_auth.py b/auth/google_auth.py new file mode 100644 index 0000000..fe70499 --- /dev/null +++ b/auth/google_auth.py @@ -0,0 +1,1166 @@ +# auth/google_auth.py + +import asyncio +import json +import jwt +import logging +import os + +from typing import List, Optional, Tuple, Dict, Any +from urllib.parse import parse_qs, urlparse + +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import Flow +from google.auth.transport.requests import Request +from google.auth.exceptions import RefreshError +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError +from auth.scopes import SCOPES, get_current_scopes, has_required_scopes # noqa +from auth.oauth21_session_store import get_oauth21_session_store +from auth.credential_store import get_credential_store +from auth.oauth_config import get_oauth_config, is_stateless_mode +from core.config import ( + get_transport_mode, + get_oauth_redirect_uri, +) +from core.context import get_fastmcp_session_id + +# Try to import FastMCP dependencies (may not be available in all environments) +try: + from fastmcp.server.dependencies import get_context as get_fastmcp_context +except ImportError: + get_fastmcp_context = None + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +# Constants +def get_default_credentials_dir(): + """Get the default credentials directory path, preferring user-specific locations. + + Environment variable priority: + 1. WORKSPACE_MCP_CREDENTIALS_DIR (preferred) + 2. GOOGLE_MCP_CREDENTIALS_DIR (backward compatibility) + 3. ~/.google_workspace_mcp/credentials (default) + """ + # Check WORKSPACE_MCP_CREDENTIALS_DIR first (preferred) + workspace_creds_dir = os.getenv("WORKSPACE_MCP_CREDENTIALS_DIR") + if workspace_creds_dir: + expanded = os.path.expanduser(workspace_creds_dir) + logger.info( + f"Using credentials directory from WORKSPACE_MCP_CREDENTIALS_DIR: {expanded}" + ) + return expanded + + # Fall back to GOOGLE_MCP_CREDENTIALS_DIR for backward compatibility + google_creds_dir = os.getenv("GOOGLE_MCP_CREDENTIALS_DIR") + if google_creds_dir: + expanded = os.path.expanduser(google_creds_dir) + logger.info( + f"Using credentials directory from GOOGLE_MCP_CREDENTIALS_DIR: {expanded}" + ) + return expanded + + # Use user home directory for credentials storage + home_dir = os.path.expanduser("~") + if home_dir and home_dir != "~": # Valid home directory found + return os.path.join(home_dir, ".google_workspace_mcp", "credentials") + + # Fallback to current working directory if home directory is not accessible + return os.path.join(os.getcwd(), ".credentials") + + +DEFAULT_CREDENTIALS_DIR = get_default_credentials_dir() + +# Session credentials now handled by OAuth21SessionStore - no local cache needed +# Centralized Client Secrets Path Logic +_client_secrets_env = os.getenv("GOOGLE_CLIENT_SECRET_PATH") or os.getenv( + "GOOGLE_CLIENT_SECRETS" +) +if _client_secrets_env: + CONFIG_CLIENT_SECRETS_PATH = _client_secrets_env +else: + # Assumes this file is in auth/ and client_secret.json is in the root + CONFIG_CLIENT_SECRETS_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "client_secret.json", + ) + +# --- Helper Functions --- + + +def _find_any_credentials( + base_dir: str = DEFAULT_CREDENTIALS_DIR, +) -> tuple[Optional[Credentials], Optional[str]]: + """ + Find and load any valid credentials from the credentials directory. + Used in single-user mode to bypass session-to-OAuth mapping. + + Returns: + Tuple of (Credentials, user_email) or (None, None) if none exist. + Returns the user email to enable saving refreshed credentials. + """ + try: + store = get_credential_store() + users = store.list_users() + if not users: + logger.info( + "[single-user] No users found with credentials via credential store" + ) + return None, None + + # Return credentials for the first user found + first_user = users[0] + credentials = store.get_credential(first_user) + if credentials: + logger.info( + f"[single-user] Found credentials for {first_user} via credential store" + ) + return credentials, first_user + else: + logger.warning( + f"[single-user] Could not load credentials for {first_user} via credential store" + ) + + except Exception as e: + logger.error( + f"[single-user] Error finding credentials via credential store: {e}" + ) + + logger.info("[single-user] No valid credentials found via credential store") + return None, None + + +def save_credentials_to_session(session_id: str, credentials: Credentials): + """Saves user credentials using OAuth21SessionStore.""" + # Get user email from credentials if possible + user_email = None + if credentials and credentials.id_token: + try: + decoded_token = jwt.decode( + credentials.id_token, options={"verify_signature": False} + ) + user_email = decoded_token.get("email") + except Exception as e: + logger.debug(f"Could not decode id_token to get email: {e}") + + if user_email: + store = get_oauth21_session_store() + store.store_session( + user_email=user_email, + access_token=credentials.token, + refresh_token=credentials.refresh_token, + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=credentials.scopes, + expiry=credentials.expiry, + mcp_session_id=session_id, + ) + logger.debug( + f"Credentials saved to OAuth21SessionStore for session_id: {session_id}, user: {user_email}" + ) + else: + logger.warning( + f"Could not save credentials to session store - no user email found for session: {session_id}" + ) + + +def load_credentials_from_session(session_id: str) -> Optional[Credentials]: + """Loads user credentials from OAuth21SessionStore.""" + store = get_oauth21_session_store() + credentials = store.get_credentials_by_mcp_session(session_id) + if credentials: + logger.debug( + f"Credentials loaded from OAuth21SessionStore for session_id: {session_id}" + ) + else: + logger.debug( + f"No credentials found in OAuth21SessionStore for session_id: {session_id}" + ) + return credentials + + +def load_client_secrets_from_env() -> Optional[Dict[str, Any]]: + """ + Loads the client secrets from environment variables. + + Environment variables used: + - GOOGLE_OAUTH_CLIENT_ID: OAuth 2.0 client ID + - GOOGLE_OAUTH_CLIENT_SECRET: OAuth 2.0 client secret + - GOOGLE_OAUTH_REDIRECT_URI: (optional) OAuth redirect URI + + Returns: + Client secrets configuration dict compatible with Google OAuth library, + or None if required environment variables are not set. + """ + client_id = os.getenv("GOOGLE_OAUTH_CLIENT_ID") + client_secret = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET") + redirect_uri = os.getenv("GOOGLE_OAUTH_REDIRECT_URI") + + if client_id and client_secret: + # Create config structure that matches Google client secrets format + web_config = { + "client_id": client_id, + "client_secret": client_secret, + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + } + + # Add redirect_uri if provided via environment variable + if redirect_uri: + web_config["redirect_uris"] = [redirect_uri] + + # Return the full config structure expected by Google OAuth library + config = {"web": web_config} + + logger.info("Loaded OAuth client credentials from environment variables") + return config + + logger.debug("OAuth client credentials not found in environment variables") + return None + + +def load_client_secrets(client_secrets_path: str) -> Dict[str, Any]: + """ + Loads the client secrets from environment variables (preferred) or from the client secrets file. + + Priority order: + 1. Environment variables (GOOGLE_OAUTH_CLIENT_ID, GOOGLE_OAUTH_CLIENT_SECRET) + 2. File-based credentials at the specified path + + Args: + client_secrets_path: Path to the client secrets JSON file (used as fallback) + + Returns: + Client secrets configuration dict + + Raises: + ValueError: If client secrets file has invalid format + IOError: If file cannot be read and no environment variables are set + """ + # First, try to load from environment variables + env_config = load_client_secrets_from_env() + if env_config: + # Extract the "web" config from the environment structure + return env_config["web"] + + # Fall back to loading from file + try: + with open(client_secrets_path, "r") as f: + client_config = json.load(f) + # The file usually contains a top-level key like "web" or "installed" + if "web" in client_config: + logger.info( + f"Loaded OAuth client credentials from file: {client_secrets_path}" + ) + return client_config["web"] + elif "installed" in client_config: + logger.info( + f"Loaded OAuth client credentials from file: {client_secrets_path}" + ) + return client_config["installed"] + else: + logger.error( + f"Client secrets file {client_secrets_path} has unexpected format." + ) + raise ValueError("Invalid client secrets file format") + except (IOError, json.JSONDecodeError) as e: + logger.error(f"Error loading client secrets file {client_secrets_path}: {e}") + raise + + +def check_client_secrets() -> Optional[str]: + """ + Checks for the presence of OAuth client secrets, either as environment + variables or as a file. + + Returns: + An error message string if secrets are not found, otherwise None. + """ + env_config = load_client_secrets_from_env() + if not env_config and not os.path.exists(CONFIG_CLIENT_SECRETS_PATH): + logger.error( + f"OAuth client credentials not found. No environment variables set and no file at {CONFIG_CLIENT_SECRETS_PATH}" + ) + return f"OAuth client credentials not found. Please set GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET environment variables or provide a client secrets file at {CONFIG_CLIENT_SECRETS_PATH}." + return None + + +def create_oauth_flow( + scopes: List[str], + redirect_uri: str, + state: Optional[str] = None, + code_verifier: Optional[str] = None, + autogenerate_code_verifier: bool = True, +) -> Flow: + """Creates an OAuth flow using environment variables or client secrets file.""" + flow_kwargs = { + "scopes": scopes, + "redirect_uri": redirect_uri, + "state": state, + } + if code_verifier: + flow_kwargs["code_verifier"] = code_verifier + # Preserve the original verifier when re-creating the flow in callback. + flow_kwargs["autogenerate_code_verifier"] = False + else: + # Generate PKCE code verifier for the initial auth flow. + # google-auth-oauthlib's from_client_* helpers pass + # autogenerate_code_verifier=None unless explicitly provided, which + # prevents Flow from generating and storing a code_verifier. + flow_kwargs["autogenerate_code_verifier"] = autogenerate_code_verifier + + # Try environment variables first + env_config = load_client_secrets_from_env() + if env_config: + # Use client config directly + flow = Flow.from_client_config(env_config, **flow_kwargs) + logger.debug("Created OAuth flow from environment variables") + return flow + + # Fall back to file-based config + if not os.path.exists(CONFIG_CLIENT_SECRETS_PATH): + raise FileNotFoundError( + f"OAuth client secrets file not found at {CONFIG_CLIENT_SECRETS_PATH} and no environment variables set" + ) + + flow = Flow.from_client_secrets_file( + CONFIG_CLIENT_SECRETS_PATH, + **flow_kwargs, + ) + logger.debug( + f"Created OAuth flow from client secrets file: {CONFIG_CLIENT_SECRETS_PATH}" + ) + return flow + + +def _determine_oauth_prompt( + user_google_email: Optional[str], + required_scopes: List[str], + session_id: Optional[str] = None, +) -> str: + """ + Determine which OAuth prompt to use for a new authorization URL. + + Uses `select_account` for re-auth when existing credentials already cover + required scopes. Uses `consent` for first-time auth and scope expansion. + """ + normalized_email = ( + user_google_email.strip() + if user_google_email + and user_google_email.strip() + and user_google_email.lower() != "default" + else None + ) + + # If no explicit email was provided, attempt to resolve it from session mapping. + if not normalized_email and session_id: + try: + session_user = get_oauth21_session_store().get_user_by_mcp_session( + session_id + ) + if session_user: + normalized_email = session_user + except Exception as e: + logger.debug(f"Could not resolve user from session for prompt choice: {e}") + + if not normalized_email: + logger.info( + "[start_auth_flow] Using prompt='consent' (no known user email for re-auth detection)." + ) + return "consent" + + existing_credentials: Optional[Credentials] = None + + # Prefer credentials bound to the current session when available. + if session_id: + try: + session_store = get_oauth21_session_store() + mapped_user = session_store.get_user_by_mcp_session(session_id) + if mapped_user == normalized_email: + existing_credentials = session_store.get_credentials_by_mcp_session( + session_id + ) + except Exception as e: + logger.debug( + f"Could not read OAuth 2.1 session store for prompt choice: {e}" + ) + + # Fall back to credential file store in stateful mode. + if not existing_credentials and not is_stateless_mode(): + try: + existing_credentials = get_credential_store().get_credential( + normalized_email + ) + except Exception as e: + logger.debug(f"Could not read credential store for prompt choice: {e}") + + if not existing_credentials: + logger.info( + f"[start_auth_flow] Using prompt='consent' (no existing credentials for {normalized_email})." + ) + return "consent" + + if has_required_scopes(existing_credentials.scopes, required_scopes): + logger.info( + f"[start_auth_flow] Using prompt='select_account' for re-auth of {normalized_email}." + ) + return "select_account" + + logger.info( + f"[start_auth_flow] Using prompt='consent' (existing credentials for {normalized_email} are missing required scopes)." + ) + return "consent" + + +# --- Core OAuth Logic --- + + +async def start_auth_flow( + user_google_email: Optional[str], + service_name: str, # e.g., "Google Calendar", "Gmail" for user messages + redirect_uri: str, # Added redirect_uri as a required parameter +) -> str: + """ + Initiates the Google OAuth flow and returns an actionable message for the user. + + Args: + user_google_email: The user's specified Google email, if provided. + service_name: The name of the Google service requiring auth (for user messages). + redirect_uri: The URI Google will redirect to after authorization. + + Returns: + A formatted string containing guidance for the LLM/user. + + Raises: + Exception: If the OAuth flow cannot be initiated. + """ + initial_email_provided = bool( + user_google_email + and user_google_email.strip() + and user_google_email.lower() != "default" + ) + user_display_name = ( + f"{service_name} for '{user_google_email}'" + if initial_email_provided + else service_name + ) + + logger.info( + f"[start_auth_flow] Initiating auth for {user_display_name} with scopes for enabled tools." + ) + + # Note: Caller should ensure OAuth callback is available before calling this function + + try: + if "OAUTHLIB_INSECURE_TRANSPORT" not in os.environ and ( + "localhost" in redirect_uri or "127.0.0.1" in redirect_uri + ): # Use passed redirect_uri + logger.warning( + "OAUTHLIB_INSECURE_TRANSPORT not set. Setting it for localhost/local development." + ) + os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" + + oauth_state = os.urandom(16).hex() + current_scopes = get_current_scopes() + + flow = create_oauth_flow( + scopes=current_scopes, # Use scopes for enabled tools only + redirect_uri=redirect_uri, # Use passed redirect_uri + state=oauth_state, + ) + + session_id = None + try: + session_id = get_fastmcp_session_id() + except Exception as e: + logger.debug( + f"Could not retrieve FastMCP session ID for state binding: {e}" + ) + + prompt_type = _determine_oauth_prompt( + user_google_email=user_google_email, + required_scopes=current_scopes, + session_id=session_id, + ) + auth_url, _ = flow.authorization_url(access_type="offline", prompt=prompt_type) + + store = get_oauth21_session_store() + store.store_oauth_state( + oauth_state, + session_id=session_id, + code_verifier=flow.code_verifier, + ) + + logger.info( + f"Auth flow started for {user_display_name}. Advise user to visit: {auth_url}" + ) + + message_lines = [ + f"**ACTION REQUIRED: Google Authentication Needed for {user_display_name}**\n", + f"To proceed, the user must authorize this application for {service_name} access using all required permissions.", + "**LLM, please present this exact authorization URL to the user as a clickable hyperlink:**", + f"Authorization URL: {auth_url}", + f"Markdown for hyperlink: [Click here to authorize {service_name} access]({auth_url})\n", + "**LLM, after presenting the link, instruct the user as follows:**", + "1. Click the link and complete the authorization in their browser.", + ] + session_info_for_llm = "" + + if not initial_email_provided: + message_lines.extend( + [ + f"2. After successful authorization{session_info_for_llm}, the browser page will display the authenticated email address.", + " **LLM: Instruct the user to provide you with this email address.**", + "3. Once you have the email, **retry their original command, ensuring you include this `user_google_email`.**", + ] + ) + else: + message_lines.append( + f"2. After successful authorization{session_info_for_llm}, **retry their original command**." + ) + + message_lines.append( + f"\nThe application will use the new credentials. If '{user_google_email}' was provided, it must match the authenticated account." + ) + return "\n".join(message_lines) + + except FileNotFoundError as e: + error_text = f"OAuth client credentials not found: {e}. Please either:\n1. Set environment variables: GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET\n2. Ensure '{CONFIG_CLIENT_SECRETS_PATH}' file exists" + logger.error(error_text, exc_info=True) + raise Exception(error_text) + except Exception as e: + error_text = f"Could not initiate authentication for {user_display_name} due to an unexpected error: {str(e)}" + logger.error( + f"Failed to start the OAuth flow for {user_display_name}: {e}", + exc_info=True, + ) + raise Exception(error_text) + + +def handle_auth_callback( + scopes: List[str], + authorization_response: str, + redirect_uri: str, + credentials_base_dir: str = DEFAULT_CREDENTIALS_DIR, + session_id: Optional[str] = None, + client_secrets_path: Optional[ + str + ] = None, # Deprecated: kept for backward compatibility +) -> Tuple[str, Credentials]: + """ + Handles the callback from Google, exchanges the code for credentials, + fetches user info, determines user_google_email, saves credentials (file & session), + and returns them. + + Args: + scopes: List of OAuth scopes requested. + authorization_response: The full callback URL from Google. + redirect_uri: The redirect URI. + credentials_base_dir: Base directory for credential files. + session_id: Optional MCP session ID to associate with the credentials. + client_secrets_path: (Deprecated) Path to client secrets file. Ignored if environment variables are set. + + Returns: + A tuple containing the user_google_email and the obtained Credentials object. + + Raises: + ValueError: If the state is missing or doesn't match. + FlowExchangeError: If the code exchange fails. + HttpError: If fetching user info fails. + """ + try: + # Log deprecation warning if old parameter is used + if client_secrets_path: + logger.warning( + "The 'client_secrets_path' parameter is deprecated. Use GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET environment variables instead." + ) + + # Allow HTTP for localhost in development + if "OAUTHLIB_INSECURE_TRANSPORT" not in os.environ: + logger.warning( + "OAUTHLIB_INSECURE_TRANSPORT not set. Setting it for localhost development." + ) + os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" + + # Allow partial scope grants without raising an exception. + # When users decline some scopes on Google's consent screen, + # oauthlib raises because the granted scopes differ from requested. + if "OAUTHLIB_RELAX_TOKEN_SCOPE" not in os.environ: + os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1" + + store = get_oauth21_session_store() + parsed_response = urlparse(authorization_response) + state_values = parse_qs(parsed_response.query).get("state") + state = state_values[0] if state_values else None + + state_info = store.validate_and_consume_oauth_state( + state, session_id=session_id + ) + logger.debug( + "Validated OAuth callback state %s for session %s", + (state[:8] if state else ""), + state_info.get("session_id") or "", + ) + + flow = create_oauth_flow( + scopes=scopes, + redirect_uri=redirect_uri, + state=state, + code_verifier=state_info.get("code_verifier"), + autogenerate_code_verifier=False, + ) + + # Exchange the authorization code for credentials + # Note: fetch_token will use the redirect_uri configured in the flow + flow.fetch_token(authorization_response=authorization_response) + credentials = flow.credentials + logger.info("Successfully exchanged authorization code for tokens.") + + # Handle partial OAuth grants: if the user declined some scopes on + # Google's consent screen, credentials.granted_scopes contains only + # what was actually authorized. Store those instead of the inflated + # requested scopes so that refresh() sends the correct scope set. + granted = getattr(credentials, "granted_scopes", None) + if granted and set(granted) != set(credentials.scopes or []): + logger.warning( + "Partial OAuth grant detected. Requested: %s, Granted: %s", + credentials.scopes, + granted, + ) + credentials = Credentials( + token=credentials.token, + refresh_token=credentials.refresh_token, + id_token=getattr(credentials, "id_token", None), + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=list(granted), + expiry=credentials.expiry, + quota_project_id=getattr(credentials, "quota_project_id", None), + ) + + # Get user info to determine user_id (using email here) + user_info = get_user_info(credentials) + if not user_info or "email" not in user_info: + logger.error("Could not retrieve user email from Google.") + raise ValueError("Failed to get user email for identification.") + + user_google_email = user_info["email"] + logger.info(f"Identified user_google_email: {user_google_email}") + + credential_store = get_credential_store() + if not credentials.refresh_token: + fallback_refresh_token = None + + if session_id: + try: + session_credentials = store.get_credentials_by_mcp_session( + session_id + ) + if session_credentials and session_credentials.refresh_token: + fallback_refresh_token = session_credentials.refresh_token + logger.info( + "OAuth callback response omitted refresh token; preserving existing refresh token from session store." + ) + except Exception as e: + logger.debug( + f"Could not check session store for existing refresh token: {e}" + ) + + if not fallback_refresh_token and not is_stateless_mode(): + try: + existing_credentials = credential_store.get_credential( + user_google_email + ) + if existing_credentials and existing_credentials.refresh_token: + fallback_refresh_token = existing_credentials.refresh_token + logger.info( + "OAuth callback response omitted refresh token; preserving existing refresh token from credential store." + ) + except Exception as e: + logger.debug( + f"Could not check credential store for existing refresh token: {e}" + ) + + if fallback_refresh_token: + credentials = Credentials( + token=credentials.token, + refresh_token=fallback_refresh_token, + id_token=getattr(credentials, "id_token", None), + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=credentials.scopes, + expiry=credentials.expiry, + quota_project_id=getattr(credentials, "quota_project_id", None), + ) + else: + logger.warning( + "OAuth callback did not include a refresh token and no previous refresh token was available to preserve." + ) + + # Save the credentials + credential_store.store_credential(user_google_email, credentials) + + # Always save to OAuth21SessionStore for centralized management + store.store_session( + user_email=user_google_email, + access_token=credentials.token, + refresh_token=credentials.refresh_token, + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=credentials.scopes, + expiry=credentials.expiry, + mcp_session_id=session_id, + issuer="https://accounts.google.com", # Add issuer for Google tokens + ) + + # If session_id is provided, also save to session cache for compatibility + if session_id: + save_credentials_to_session(session_id, credentials) + + return user_google_email, credentials + + except Exception as e: # Catch specific exceptions like FlowExchangeError if needed + logger.error(f"Error handling auth callback: {e}") + raise # Re-raise for the caller + + +def get_credentials( + user_google_email: Optional[str], # Can be None if relying on session_id + required_scopes: List[str], + client_secrets_path: Optional[str] = None, + credentials_base_dir: str = DEFAULT_CREDENTIALS_DIR, + session_id: Optional[str] = None, +) -> Optional[Credentials]: + """ + Retrieves stored credentials, prioritizing OAuth 2.1 store, then session, then file. Refreshes if necessary. + If credentials are loaded from file and a session_id is present, they are cached in the session. + In single-user mode, bypasses session mapping and uses any available credentials. + + Args: + user_google_email: Optional user's Google email. + required_scopes: List of scopes the credentials must have. + client_secrets_path: Optional path to client secrets (legacy; refresh uses embedded client info). + credentials_base_dir: Base directory for credential files. + session_id: Optional MCP session ID. + + Returns: + Valid Credentials object or None. + """ + skip_session_cache = False + # First, try OAuth 2.1 session store if we have a session_id (FastMCP session) + if session_id: + try: + store = get_oauth21_session_store() + + session_user = store.get_user_by_mcp_session(session_id) + if user_google_email and session_user and session_user != user_google_email: + logger.info( + f"[get_credentials] Session user {session_user} doesn't match requested {user_google_email}; " + "skipping session store" + ) + skip_session_cache = True + else: + # Try to get credentials by MCP session + credentials = store.get_credentials_by_mcp_session(session_id) + if credentials: + logger.info( + f"[get_credentials] Found OAuth 2.1 credentials for MCP session {session_id}" + ) + + # Refresh invalid credentials before checking scopes + if (not credentials.valid) and credentials.refresh_token: + try: + credentials.refresh(Request()) + logger.info( + f"[get_credentials] Refreshed OAuth 2.1 credentials for session {session_id}" + ) + # Update stored credentials + user_email = store.get_user_by_mcp_session(session_id) + if user_email: + store.store_session( + user_email=user_email, + access_token=credentials.token, + refresh_token=credentials.refresh_token, + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=credentials.scopes, + expiry=credentials.expiry, + mcp_session_id=session_id, + issuer="https://accounts.google.com", + ) + # Persist to file so rotated refresh tokens survive restarts + if not is_stateless_mode(): + try: + credential_store = get_credential_store() + credential_store.store_credential( + user_email, credentials + ) + except Exception as persist_error: + logger.warning( + f"[get_credentials] Failed to persist refreshed OAuth 2.1 credentials for user {user_email}: {persist_error}" + ) + except Exception as e: + logger.error( + f"[get_credentials] Failed to refresh OAuth 2.1 credentials: {e}" + ) + return None + + # Check scopes after refresh so stale metadata doesn't block valid tokens + if not has_required_scopes(credentials.scopes, required_scopes): + logger.warning( + f"[get_credentials] OAuth 2.1 credentials lack required scopes. Need: {required_scopes}, Have: {credentials.scopes}" + ) + return None + + if credentials.valid: + return credentials + + return None + except ImportError: + pass # OAuth 2.1 store not available + except Exception as e: + logger.debug(f"[get_credentials] Error checking OAuth 2.1 store: {e}") + + # Check for single-user mode + if os.getenv("MCP_SINGLE_USER_MODE") == "1": + logger.info( + "[get_credentials] Single-user mode: bypassing session mapping, finding any credentials" + ) + credentials, found_user_email = _find_any_credentials(credentials_base_dir) + if not credentials: + logger.info( + f"[get_credentials] Single-user mode: No credentials found in {credentials_base_dir}" + ) + return None + + # Use the email from the credential file if not provided + # This ensures we can save refreshed credentials even when the token is expired + if not user_google_email and found_user_email: + user_google_email = found_user_email + logger.debug( + f"[get_credentials] Single-user mode: using email {user_google_email} from credential file" + ) + else: + credentials: Optional[Credentials] = None + + # Session ID should be provided by the caller + if not session_id: + logger.debug("[get_credentials] No session_id provided") + + logger.debug( + f"[get_credentials] Called for user_google_email: '{user_google_email}', session_id: '{session_id}', required_scopes: {required_scopes}" + ) + + if session_id and not skip_session_cache: + credentials = load_credentials_from_session(session_id) + if credentials: + logger.debug( + f"[get_credentials] Loaded credentials from session for session_id '{session_id}'." + ) + + if not credentials and user_google_email: + if not is_stateless_mode(): + logger.debug( + f"[get_credentials] No session credentials, trying credential store for user_google_email '{user_google_email}'." + ) + store = get_credential_store() + credentials = store.get_credential(user_google_email) + else: + logger.debug( + f"[get_credentials] No session credentials, skipping file store in stateless mode for user_google_email '{user_google_email}'." + ) + + if credentials and session_id: + logger.debug( + f"[get_credentials] Loaded from file for user '{user_google_email}', caching to session '{session_id}'." + ) + if not skip_session_cache: + save_credentials_to_session( + session_id, credentials + ) # Cache for current session + + if not credentials: + logger.info( + f"[get_credentials] No credentials found for user '{user_google_email}' or session '{session_id}'." + ) + return None + + logger.debug( + f"[get_credentials] Credentials found. Scopes: {credentials.scopes}, Valid: {credentials.valid}, Expired: {credentials.expired}" + ) + + # Attempt refresh before checking scopes — the scope check validates against + # credentials.scopes which is set at authorization time and not updated by the + # google-auth library on refresh. Checking scopes first would block a valid + # refresh attempt when stored scope metadata is stale. + if credentials.valid: + logger.debug( + f"[get_credentials] Credentials are valid. User: '{user_google_email}', Session: '{session_id}'" + ) + elif credentials.refresh_token: + logger.info( + f"[get_credentials] Credentials not valid. Attempting refresh. User: '{user_google_email}', Session: '{session_id}'" + ) + try: + logger.debug( + "[get_credentials] Refreshing token using embedded client credentials" + ) + credentials.refresh(Request()) + logger.info( + f"[get_credentials] Credentials refreshed successfully. User: '{user_google_email}', Session: '{session_id}'" + ) + + # Save refreshed credentials (skip file save in stateless mode) + if user_google_email: # Always save to credential store if email is known + if not is_stateless_mode(): + credential_store = get_credential_store() + credential_store.store_credential(user_google_email, credentials) + else: + logger.info( + f"Skipping credential file save in stateless mode for {user_google_email}" + ) + + # Also update OAuth21SessionStore + store = get_oauth21_session_store() + store.store_session( + user_email=user_google_email, + access_token=credentials.token, + refresh_token=credentials.refresh_token, + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=credentials.scopes, + expiry=credentials.expiry, + mcp_session_id=session_id, + issuer="https://accounts.google.com", # Add issuer for Google tokens + ) + + if session_id: # Update session cache if it was the source or is active + save_credentials_to_session(session_id, credentials) + except RefreshError as e: + logger.warning( + f"[get_credentials] RefreshError - token expired/revoked: {e}. User: '{user_google_email}', Session: '{session_id}'" + ) + # For RefreshError, we should return None to trigger reauthentication + return None + except Exception as e: + logger.error( + f"[get_credentials] Error refreshing credentials: {e}. User: '{user_google_email}', Session: '{session_id}'", + exc_info=True, + ) + return None # Failed to refresh + else: + logger.warning( + f"[get_credentials] Credentials invalid/cannot refresh. Valid: {credentials.valid}, Refresh Token: {credentials.refresh_token is not None}. User: '{user_google_email}', Session: '{session_id}'" + ) + return None + + # Check scopes after refresh so stale scope metadata doesn't block valid tokens. + # Uses hierarchy-aware check (e.g. gmail.modify satisfies gmail.readonly). + if not has_required_scopes(credentials.scopes, required_scopes): + logger.warning( + f"[get_credentials] Credentials lack required scopes. Need: {required_scopes}, Have: {credentials.scopes}. User: '{user_google_email}', Session: '{session_id}'" + ) + return None # Re-authentication needed for scopes + + logger.debug( + f"[get_credentials] Credentials have sufficient scopes. User: '{user_google_email}', Session: '{session_id}'" + ) + return credentials + + +def get_user_info( + credentials: Credentials, *, skip_valid_check: bool = False +) -> Optional[Dict[str, Any]]: + """Fetches basic user profile information (requires userinfo.email scope).""" + if not credentials: + logger.error("Cannot get user info: Missing credentials.") + return None + if not skip_valid_check and not credentials.valid: + logger.error("Cannot get user info: Invalid credentials.") + return None + service = None + try: + # Using googleapiclient discovery to get user info + # Requires 'google-api-python-client' library + service = build("oauth2", "v2", credentials=credentials) + user_info = service.userinfo().get().execute() + logger.info(f"Successfully fetched user info: {user_info.get('email')}") + return user_info + except HttpError as e: + logger.error(f"HttpError fetching user info: {e.status_code} {e.reason}") + # Handle specific errors, e.g., 401 Unauthorized might mean token issue + return None + except Exception as e: + logger.error(f"Unexpected error fetching user info: {e}") + return None + finally: + if service: + service.close() + + +# --- Centralized Google Service Authentication --- + + +class GoogleAuthenticationError(Exception): + """Exception raised when Google authentication is required or fails.""" + + def __init__(self, message: str, auth_url: Optional[str] = None): + super().__init__(message) + self.auth_url = auth_url + + +async def get_authenticated_google_service( + service_name: str, # "gmail", "calendar", "drive", "docs" + version: str, # "v1", "v3" + tool_name: str, # For logging/debugging + user_google_email: str, # Required - no more Optional + required_scopes: List[str], + session_id: Optional[str] = None, # Session context for logging +) -> tuple[Any, str]: + """ + Centralized Google service authentication for all MCP tools. + Returns (service, user_email) on success or raises GoogleAuthenticationError. + + Args: + service_name: The Google service name ("gmail", "calendar", "drive", "docs") + version: The API version ("v1", "v3", etc.) + tool_name: The name of the calling tool (for logging/debugging) + user_google_email: The user's Google email address (required) + required_scopes: List of required OAuth scopes + + Returns: + tuple[service, user_email] on success + + Raises: + GoogleAuthenticationError: When authentication is required or fails + """ + + # Try to get FastMCP session ID if not provided + if not session_id: + try: + # First try context variable (works in async context) + session_id = get_fastmcp_session_id() + if session_id: + logger.debug( + f"[{tool_name}] Got FastMCP session ID from context: {session_id}" + ) + else: + logger.debug( + f"[{tool_name}] Context variable returned None/empty session ID" + ) + except Exception as e: + logger.debug( + f"[{tool_name}] Could not get FastMCP session from context: {e}" + ) + + # Fallback to direct FastMCP context if context variable not set + if not session_id and get_fastmcp_context: + try: + fastmcp_ctx = get_fastmcp_context() + if fastmcp_ctx and hasattr(fastmcp_ctx, "session_id"): + session_id = fastmcp_ctx.session_id + logger.debug( + f"[{tool_name}] Got FastMCP session ID directly: {session_id}" + ) + else: + logger.debug( + f"[{tool_name}] FastMCP context exists but no session_id attribute" + ) + except Exception as e: + logger.debug( + f"[{tool_name}] Could not get FastMCP context directly: {e}" + ) + + # Final fallback: log if we still don't have session_id + if not session_id: + logger.warning( + f"[{tool_name}] Unable to obtain FastMCP session ID from any source" + ) + + logger.info( + f"[{tool_name}] Attempting to get authenticated {service_name} service. Email: '{user_google_email}', Session: '{session_id}'" + ) + + # Validate email format + if not user_google_email or "@" not in user_google_email: + error_msg = f"Authentication required for {tool_name}. No valid 'user_google_email' provided. Please provide a valid Google email address." + logger.info(f"[{tool_name}] {error_msg}") + raise GoogleAuthenticationError(error_msg) + + credentials = await asyncio.to_thread( + get_credentials, + user_google_email=user_google_email, + required_scopes=required_scopes, + client_secrets_path=CONFIG_CLIENT_SECRETS_PATH, + session_id=session_id, # Pass through session context + ) + + if not credentials or not credentials.valid: + logger.warning( + f"[{tool_name}] No valid credentials. Email: '{user_google_email}'." + ) + logger.info( + f"[{tool_name}] Valid email '{user_google_email}' provided, initiating auth flow." + ) + + # Ensure OAuth callback is available + from auth.oauth_callback_server import ensure_oauth_callback_available + + redirect_uri = get_oauth_redirect_uri() + config = get_oauth_config() + success, error_msg = ensure_oauth_callback_available( + get_transport_mode(), config.port, config.base_uri + ) + if not success: + error_detail = f" ({error_msg})" if error_msg else "" + raise GoogleAuthenticationError( + f"Cannot initiate OAuth flow - callback server unavailable{error_detail}" + ) + + # Generate auth URL and raise exception with it + auth_response = await start_auth_flow( + user_google_email=user_google_email, + service_name=f"Google {service_name.title()}", + redirect_uri=redirect_uri, + ) + + # Extract the auth URL from the response and raise with it + raise GoogleAuthenticationError(auth_response) + + try: + service = build(service_name, version, credentials=credentials) + log_user_email = user_google_email + + # Try to get email from credentials if needed for validation + if credentials and credentials.id_token: + try: + # Decode without verification (just to get email for logging) + decoded_token = jwt.decode( + credentials.id_token, options={"verify_signature": False} + ) + token_email = decoded_token.get("email") + if token_email: + log_user_email = token_email + logger.info(f"[{tool_name}] Token email: {token_email}") + except Exception as e: + logger.debug(f"[{tool_name}] Could not decode id_token: {e}") + + logger.info( + f"[{tool_name}] Successfully authenticated {service_name} service for user: {log_user_email}" + ) + return service, log_user_email + + except Exception as e: + error_msg = f"[{tool_name}] Failed to build {service_name} service: {str(e)}" + logger.error(error_msg, exc_info=True) + raise GoogleAuthenticationError(error_msg) diff --git a/auth/mcp_session_middleware.py b/auth/mcp_session_middleware.py new file mode 100644 index 0000000..1e84308 --- /dev/null +++ b/auth/mcp_session_middleware.py @@ -0,0 +1,104 @@ +""" +MCP Session Middleware + +This middleware intercepts MCP requests and sets the session context +for use by tool functions. +""" + +import logging +from typing import Callable, Any + +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request + +from auth.oauth21_session_store import ( + SessionContext, + SessionContextManager, + extract_session_from_headers, +) +# OAuth 2.1 is now handled by FastMCP auth + +logger = logging.getLogger(__name__) + + +class MCPSessionMiddleware(BaseHTTPMiddleware): + """ + Middleware that extracts session information from requests and makes it + available to MCP tool functions via context variables. + """ + + async def dispatch(self, request: Request, call_next: Callable) -> Any: + """Process request and set session context.""" + + logger.debug( + f"MCPSessionMiddleware processing request: {request.method} {request.url.path}" + ) + + # Skip non-MCP paths + if not request.url.path.startswith("/mcp"): + logger.debug(f"Skipping non-MCP path: {request.url.path}") + return await call_next(request) + + session_context = None + + try: + # Extract session information + headers = dict(request.headers) + session_id = extract_session_from_headers(headers) + + # Try to get OAuth 2.1 auth context from FastMCP + auth_context = None + user_email = None + mcp_session_id = None + # Check for FastMCP auth context + if hasattr(request.state, "auth"): + auth_context = request.state.auth + # Extract user email from auth claims if available + if hasattr(auth_context, "claims") and auth_context.claims: + user_email = auth_context.claims.get("email") + + # Check for FastMCP session ID (from streamable HTTP transport) + if hasattr(request.state, "session_id"): + mcp_session_id = request.state.session_id + logger.debug(f"Found FastMCP session ID: {mcp_session_id}") + + # SECURITY: Do not decode JWT without verification + # User email must come from verified sources only (FastMCP auth context) + + # Build session context + if session_id or auth_context or user_email or mcp_session_id: + # Create session ID hierarchy: explicit session_id > Google user session > FastMCP session + effective_session_id = session_id + if not effective_session_id and user_email: + effective_session_id = f"google_{user_email}" + elif not effective_session_id and mcp_session_id: + effective_session_id = mcp_session_id + + session_context = SessionContext( + session_id=effective_session_id, + user_id=user_email + or (auth_context.user_id if auth_context else None), + auth_context=auth_context, + request=request, + metadata={ + "path": request.url.path, + "method": request.method, + "user_email": user_email, + "mcp_session_id": mcp_session_id, + }, + ) + + logger.debug( + f"MCP request with session: session_id={session_context.session_id}, " + f"user_id={session_context.user_id}, path={request.url.path}" + ) + + # Process request with session context + with SessionContextManager(session_context): + response = await call_next(request) + return response + + except Exception as e: + logger.error(f"Error in MCP session middleware: {e}") + # Continue without session context + return await call_next(request) diff --git a/auth/oauth21_session_store.py b/auth/oauth21_session_store.py new file mode 100644 index 0000000..f659de2 --- /dev/null +++ b/auth/oauth21_session_store.py @@ -0,0 +1,989 @@ +""" +OAuth 2.1 Session Store for Google Services + +This module provides a global store for OAuth 2.1 authenticated sessions +that can be accessed by Google service decorators. It also includes +session context management and credential conversion functionality. +""" + +import contextvars +import logging +from typing import Dict, Optional, Any, Tuple +from threading import RLock +from datetime import datetime, timedelta, timezone +from dataclasses import dataclass + +from fastmcp.server.auth import AccessToken +from google.oauth2.credentials import Credentials +from auth.oauth_config import is_external_oauth21_provider + +logger = logging.getLogger(__name__) + + +def _normalize_expiry_to_naive_utc(expiry: Optional[Any]) -> Optional[datetime]: + """ + Convert expiry values to timezone-naive UTC datetimes for google-auth compatibility. + + Naive datetime inputs are assumed to already represent UTC and are returned unchanged so that + google-auth Credentials receive naive UTC datetimes for expiry comparison. + """ + if expiry is None: + return None + + if isinstance(expiry, datetime): + if expiry.tzinfo is not None: + try: + return expiry.astimezone(timezone.utc).replace(tzinfo=None) + except Exception: # pragma: no cover - defensive + logger.debug( + "Failed to normalize aware expiry; returning without tzinfo" + ) + return expiry.replace(tzinfo=None) + return expiry # Already naive; assumed to represent UTC + + if isinstance(expiry, str): + try: + parsed = datetime.fromisoformat(expiry.replace("Z", "+00:00")) + except ValueError: + logger.debug("Failed to parse expiry string '%s'", expiry) + return None + return _normalize_expiry_to_naive_utc(parsed) + + logger.debug("Unsupported expiry type '%s' (%s)", expiry, type(expiry)) + return None + + +# Context variable to store the current session information +_current_session_context: contextvars.ContextVar[Optional["SessionContext"]] = ( + contextvars.ContextVar("current_session_context", default=None) +) + + +@dataclass +class SessionContext: + """Container for session-related information.""" + + session_id: Optional[str] = None + user_id: Optional[str] = None + auth_context: Optional[Any] = None + request: Optional[Any] = None + metadata: Dict[str, Any] = None + issuer: Optional[str] = None + + def __post_init__(self): + if self.metadata is None: + self.metadata = {} + + +def set_session_context(context: Optional[SessionContext]): + """ + Set the current session context. + + Args: + context: The session context to set + """ + _current_session_context.set(context) + if context: + logger.debug( + f"Set session context: session_id={context.session_id}, user_id={context.user_id}" + ) + else: + logger.debug("Cleared session context") + + +def get_session_context() -> Optional[SessionContext]: + """ + Get the current session context. + + Returns: + The current session context or None + """ + return _current_session_context.get() + + +def clear_session_context(): + """Clear the current session context.""" + set_session_context(None) + + +class SessionContextManager: + """ + Context manager for temporarily setting session context. + + Usage: + with SessionContextManager(session_context): + # Code that needs access to session context + pass + """ + + def __init__(self, context: Optional[SessionContext]): + self.context = context + self.token = None + + def __enter__(self): + """Set the session context.""" + self.token = _current_session_context.set(self.context) + return self.context + + def __exit__(self, exc_type, exc_val, exc_tb): + """Reset the session context.""" + if self.token: + _current_session_context.reset(self.token) + + +def extract_session_from_headers(headers: Dict[str, str]) -> Optional[str]: + """ + Extract session ID from request headers. + + Args: + headers: Request headers + + Returns: + Session ID if found + """ + # Try different header names + session_id = headers.get("mcp-session-id") or headers.get("Mcp-Session-Id") + if session_id: + return session_id + + session_id = headers.get("x-session-id") or headers.get("X-Session-ID") + if session_id: + return session_id + + # Try Authorization header for Bearer token + auth_header = headers.get("authorization") or headers.get("Authorization") + if auth_header and auth_header.lower().startswith("bearer "): + token = auth_header[7:] # Remove "Bearer " prefix + # Intentionally ignore empty tokens - "Bearer " with no token should not + # create a session context (avoids hash collisions on empty string) + if token: + # Use thread-safe lookup to find session by access token + store = get_oauth21_session_store() + session_id = store.find_session_id_for_access_token(token) + if session_id: + return session_id + + # If no session found, create a temporary session ID from token hash + # This allows header-based authentication to work with session context + import hashlib + + token_hash = hashlib.sha256(token.encode()).hexdigest()[:8] + return f"bearer_token_{token_hash}" + + return None + + +# ============================================================================= +# OAuth21SessionStore - Main Session Management +# ============================================================================= + + +class OAuth21SessionStore: + """ + Global store for OAuth 2.1 authenticated sessions. + + This store maintains a mapping of user emails to their OAuth 2.1 + authenticated credentials, allowing Google services to access them. + It also maintains a mapping from FastMCP session IDs to user emails. + + Security: Sessions are bound to specific users and can only access + their own credentials. + """ + + def __init__(self): + self._sessions: Dict[str, Dict[str, Any]] = {} + self._mcp_session_mapping: Dict[ + str, str + ] = {} # Maps FastMCP session ID -> user email + self._session_auth_binding: Dict[ + str, str + ] = {} # Maps session ID -> authenticated user email (immutable) + self._oauth_states: Dict[str, Dict[str, Any]] = {} + self._lock = RLock() + + def _cleanup_expired_oauth_states_locked(self): + """Remove expired OAuth state entries. Caller must hold lock.""" + now = datetime.now(timezone.utc) + expired_states = [ + state + for state, data in self._oauth_states.items() + if data.get("expires_at") and data["expires_at"] <= now + ] + for state in expired_states: + del self._oauth_states[state] + logger.debug( + "Removed expired OAuth state: %s", + state[:8] if len(state) > 8 else state, + ) + + def store_oauth_state( + self, + state: str, + session_id: Optional[str] = None, + expires_in_seconds: int = 600, + code_verifier: Optional[str] = None, + ) -> None: + """Persist an OAuth state value for later validation.""" + if not state: + raise ValueError("OAuth state must be provided") + if expires_in_seconds < 0: + raise ValueError("expires_in_seconds must be non-negative") + + with self._lock: + self._cleanup_expired_oauth_states_locked() + now = datetime.now(timezone.utc) + expiry = now + timedelta(seconds=expires_in_seconds) + self._oauth_states[state] = { + "session_id": session_id, + "expires_at": expiry, + "created_at": now, + "code_verifier": code_verifier, + } + logger.debug( + "Stored OAuth state %s (expires at %s)", + state[:8] if len(state) > 8 else state, + expiry.isoformat(), + ) + + def validate_and_consume_oauth_state( + self, + state: str, + session_id: Optional[str] = None, + ) -> Dict[str, Any]: + """ + Validate that a state value exists and consume it. + + Args: + state: The OAuth state returned by Google. + session_id: Optional session identifier that initiated the flow. + + Returns: + Metadata associated with the state. + + Raises: + ValueError: If the state is missing, expired, or does not match the session. + """ + if not state: + raise ValueError("Missing OAuth state parameter") + + with self._lock: + self._cleanup_expired_oauth_states_locked() + state_info = self._oauth_states.get(state) + + if not state_info: + logger.error( + "SECURITY: OAuth callback received unknown or expired state" + ) + raise ValueError("Invalid or expired OAuth state parameter") + + bound_session = state_info.get("session_id") + if bound_session and session_id and bound_session != session_id: + # Consume the state to prevent replay attempts + del self._oauth_states[state] + logger.error( + "SECURITY: OAuth state session mismatch (expected %s, got %s)", + bound_session, + session_id, + ) + raise ValueError("OAuth state does not match the initiating session") + + # State is valid – consume it to prevent reuse + del self._oauth_states[state] + logger.debug( + "Validated OAuth state %s", + state[:8] if len(state) > 8 else state, + ) + return state_info + + def store_session( + self, + user_email: str, + access_token: str, + refresh_token: Optional[str] = None, + token_uri: str = "https://oauth2.googleapis.com/token", + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + scopes: Optional[list] = None, + expiry: Optional[Any] = None, + session_id: Optional[str] = None, + mcp_session_id: Optional[str] = None, + issuer: Optional[str] = None, + ): + """ + Store OAuth 2.1 session information. + + Args: + user_email: User's email address + access_token: OAuth 2.1 access token + refresh_token: OAuth 2.1 refresh token + token_uri: Token endpoint URI + client_id: OAuth client ID + client_secret: OAuth client secret + scopes: List of granted scopes + expiry: Token expiry time + session_id: OAuth 2.1 session ID + mcp_session_id: FastMCP session ID to map to this user + issuer: Token issuer (e.g., "https://accounts.google.com") + """ + with self._lock: + normalized_expiry = _normalize_expiry_to_naive_utc(expiry) + + # Clean up previous session mappings for this user before storing new one + old_session = self._sessions.get(user_email) + if old_session: + old_mcp_session_id = old_session.get("mcp_session_id") + old_session_id = old_session.get("session_id") + # Remove old MCP session mapping if it differs from new one + if old_mcp_session_id and old_mcp_session_id != mcp_session_id: + if old_mcp_session_id in self._mcp_session_mapping: + del self._mcp_session_mapping[old_mcp_session_id] + logger.debug( + f"Removed stale MCP session mapping: {old_mcp_session_id}" + ) + if old_mcp_session_id in self._session_auth_binding: + del self._session_auth_binding[old_mcp_session_id] + logger.debug( + f"Removed stale auth binding: {old_mcp_session_id}" + ) + # Remove old OAuth session binding if it differs from new one + if old_session_id and old_session_id != session_id: + if old_session_id in self._session_auth_binding: + del self._session_auth_binding[old_session_id] + logger.debug( + f"Removed stale OAuth session binding: {old_session_id}" + ) + + session_info = { + "access_token": access_token, + "refresh_token": refresh_token, + "token_uri": token_uri, + "client_id": client_id, + "client_secret": client_secret, + "scopes": scopes or [], + "expiry": normalized_expiry, + "session_id": session_id, + "mcp_session_id": mcp_session_id, + "issuer": issuer, + } + + self._sessions[user_email] = session_info + + # Store MCP session mapping if provided + if mcp_session_id: + # Create immutable session binding (first binding wins, cannot be changed) + if mcp_session_id not in self._session_auth_binding: + self._session_auth_binding[mcp_session_id] = user_email + logger.info( + f"Created immutable session binding: {mcp_session_id} -> {user_email}" + ) + elif self._session_auth_binding[mcp_session_id] != user_email: + # Security: Attempt to bind session to different user + logger.error( + f"SECURITY: Attempt to rebind session {mcp_session_id} from {self._session_auth_binding[mcp_session_id]} to {user_email}" + ) + raise ValueError( + f"Session {mcp_session_id} is already bound to a different user" + ) + + self._mcp_session_mapping[mcp_session_id] = user_email + logger.info( + f"Stored OAuth 2.1 session for {user_email} (session_id: {session_id}, mcp_session_id: {mcp_session_id})" + ) + else: + logger.info( + f"Stored OAuth 2.1 session for {user_email} (session_id: {session_id})" + ) + + # Also create binding for the OAuth session ID + if session_id and session_id not in self._session_auth_binding: + self._session_auth_binding[session_id] = user_email + + def get_credentials(self, user_email: str) -> Optional[Credentials]: + """ + Get Google credentials for a user from OAuth 2.1 session. + + Args: + user_email: User's email address + + Returns: + Google Credentials object or None + """ + with self._lock: + session_info = self._sessions.get(user_email) + if not session_info: + logger.debug(f"No OAuth 2.1 session found for {user_email}") + return None + + try: + # Create Google credentials from session info + credentials = Credentials( + token=session_info["access_token"], + refresh_token=session_info.get("refresh_token"), + token_uri=session_info["token_uri"], + client_id=session_info.get("client_id"), + client_secret=session_info.get("client_secret"), + scopes=session_info.get("scopes", []), + expiry=session_info.get("expiry"), + ) + + logger.debug(f"Retrieved OAuth 2.1 credentials for {user_email}") + return credentials + + except Exception as e: + logger.error(f"Failed to create credentials for {user_email}: {e}") + return None + + def get_credentials_by_mcp_session( + self, mcp_session_id: str + ) -> Optional[Credentials]: + """ + Get Google credentials using FastMCP session ID. + + Args: + mcp_session_id: FastMCP session ID + + Returns: + Google Credentials object or None + """ + with self._lock: + # Look up user email from MCP session mapping + user_email = self._mcp_session_mapping.get(mcp_session_id) + if not user_email: + logger.debug(f"No user mapping found for MCP session {mcp_session_id}") + return None + + logger.debug(f"Found user {user_email} for MCP session {mcp_session_id}") + return self.get_credentials(user_email) + + def get_credentials_with_validation( + self, + requested_user_email: str, + session_id: Optional[str] = None, + auth_token_email: Optional[str] = None, + allow_recent_auth: bool = False, + ) -> Optional[Credentials]: + """ + Get Google credentials with session validation. + + This method ensures that a session can only access credentials for its + authenticated user, preventing cross-account access. + + Args: + requested_user_email: The email of the user whose credentials are requested + session_id: The current session ID (MCP or OAuth session) + auth_token_email: Email from the verified auth token (if available) + + Returns: + Google Credentials object if validation passes, None otherwise + """ + with self._lock: + # Priority 1: Check auth token email (most secure, from verified JWT) + if auth_token_email: + if auth_token_email != requested_user_email: + logger.error( + f"SECURITY VIOLATION: Token for {auth_token_email} attempted to access " + f"credentials for {requested_user_email}" + ) + return None + # Token email matches, allow access + return self.get_credentials(requested_user_email) + + # Priority 2: Check session binding + if session_id: + bound_user = self._session_auth_binding.get(session_id) + if bound_user: + if bound_user != requested_user_email: + logger.error( + f"SECURITY VIOLATION: Session {session_id} (bound to {bound_user}) " + f"attempted to access credentials for {requested_user_email}" + ) + return None + # Session binding matches, allow access + return self.get_credentials(requested_user_email) + + # Check if this is an MCP session + mcp_user = self._mcp_session_mapping.get(session_id) + if mcp_user: + if mcp_user != requested_user_email: + logger.error( + f"SECURITY VIOLATION: MCP session {session_id} (user {mcp_user}) " + f"attempted to access credentials for {requested_user_email}" + ) + return None + # MCP session matches, allow access + return self.get_credentials(requested_user_email) + + # Special case: Allow access if user has recently authenticated (for clients that don't send tokens) + # CRITICAL SECURITY: This is ONLY allowed in stdio mode, NEVER in OAuth 2.1 mode + if allow_recent_auth and requested_user_email in self._sessions: + # Check transport mode to ensure this is only used in stdio + try: + from core.config import get_transport_mode + + transport_mode = get_transport_mode() + if transport_mode != "stdio": + logger.error( + f"SECURITY: Attempted to use allow_recent_auth in {transport_mode} mode. " + f"This is only allowed in stdio mode!" + ) + return None + except Exception as e: + logger.error(f"Failed to check transport mode: {e}") + return None + + logger.info( + f"Allowing credential access for {requested_user_email} based on recent authentication " + f"(stdio mode only - client not sending bearer token)" + ) + return self.get_credentials(requested_user_email) + + # No session or token info available - deny access for security + logger.warning( + f"Credential access denied for {requested_user_email}: No valid session or token" + ) + return None + + def get_user_by_mcp_session(self, mcp_session_id: str) -> Optional[str]: + """ + Get user email by FastMCP session ID. + + Args: + mcp_session_id: FastMCP session ID + + Returns: + User email or None + """ + with self._lock: + return self._mcp_session_mapping.get(mcp_session_id) + + def get_session_info(self, user_email: str) -> Optional[Dict[str, Any]]: + """ + Get complete session information including issuer. + + Args: + user_email: User's email address + + Returns: + Session information dictionary or None + """ + with self._lock: + return self._sessions.get(user_email) + + def remove_session(self, user_email: str): + """Remove session for a user.""" + with self._lock: + if user_email in self._sessions: + # Get session IDs to clean up mappings + session_info = self._sessions.get(user_email, {}) + mcp_session_id = session_info.get("mcp_session_id") + session_id = session_info.get("session_id") + + # Remove from sessions + del self._sessions[user_email] + + # Remove from MCP mapping if exists + if mcp_session_id and mcp_session_id in self._mcp_session_mapping: + del self._mcp_session_mapping[mcp_session_id] + # Also remove from auth binding + if mcp_session_id in self._session_auth_binding: + del self._session_auth_binding[mcp_session_id] + logger.info( + f"Removed OAuth 2.1 session for {user_email} and MCP mapping for {mcp_session_id}" + ) + + # Remove OAuth session binding if exists + if session_id and session_id in self._session_auth_binding: + del self._session_auth_binding[session_id] + + if not mcp_session_id: + logger.info(f"Removed OAuth 2.1 session for {user_email}") + + # Clean up any orphaned mappings that may have accumulated + self._cleanup_orphaned_mappings_locked() + + def has_session(self, user_email: str) -> bool: + """Check if a user has an active session.""" + with self._lock: + return user_email in self._sessions + + def has_mcp_session(self, mcp_session_id: str) -> bool: + """Check if an MCP session has an associated user session.""" + with self._lock: + return mcp_session_id in self._mcp_session_mapping + + def get_single_user_email(self) -> Optional[str]: + """Return the sole authenticated user email when exactly one session exists.""" + with self._lock: + if len(self._sessions) == 1: + return next(iter(self._sessions)) + return None + + def get_stats(self) -> Dict[str, Any]: + """Get store statistics.""" + with self._lock: + return { + "total_sessions": len(self._sessions), + "users": list(self._sessions.keys()), + "mcp_session_mappings": len(self._mcp_session_mapping), + "mcp_sessions": list(self._mcp_session_mapping.keys()), + } + + def find_session_id_for_access_token(self, token: str) -> Optional[str]: + """ + Thread-safe lookup of session ID by access token. + + Args: + token: The access token to search for + + Returns: + Session ID if found, None otherwise + """ + with self._lock: + for user_email, session_info in self._sessions.items(): + if session_info.get("access_token") == token: + return session_info.get("session_id") or f"bearer_{user_email}" + return None + + def _cleanup_orphaned_mappings_locked(self) -> int: + """Remove orphaned mappings. Caller must hold lock.""" + # Collect valid session IDs and mcp_session_ids from active sessions + valid_session_ids = set() + valid_mcp_session_ids = set() + for session_info in self._sessions.values(): + if session_info.get("session_id"): + valid_session_ids.add(session_info["session_id"]) + if session_info.get("mcp_session_id"): + valid_mcp_session_ids.add(session_info["mcp_session_id"]) + + removed = 0 + + # Remove orphaned MCP session mappings + orphaned_mcp = [ + sid for sid in self._mcp_session_mapping if sid not in valid_mcp_session_ids + ] + for sid in orphaned_mcp: + del self._mcp_session_mapping[sid] + removed += 1 + logger.debug(f"Removed orphaned MCP session mapping: {sid}") + + # Remove orphaned auth bindings + valid_bindings = valid_session_ids | valid_mcp_session_ids + orphaned_bindings = [ + sid for sid in self._session_auth_binding if sid not in valid_bindings + ] + for sid in orphaned_bindings: + del self._session_auth_binding[sid] + removed += 1 + logger.debug(f"Removed orphaned auth binding: {sid}") + + if removed > 0: + logger.info(f"Cleaned up {removed} orphaned session mappings/bindings") + + return removed + + def cleanup_orphaned_mappings(self) -> int: + """ + Remove orphaned entries from mcp_session_mapping and session_auth_binding. + + Returns: + Number of orphaned entries removed + """ + with self._lock: + return self._cleanup_orphaned_mappings_locked() + + +# Global instance +_global_store = OAuth21SessionStore() + + +def get_oauth21_session_store() -> OAuth21SessionStore: + """Get the global OAuth 2.1 session store.""" + return _global_store + + +# ============================================================================= +# Google Credentials Bridge (absorbed from oauth21_google_bridge.py) +# ============================================================================= + +# Global auth provider instance (set during server initialization) +_auth_provider = None + + +def set_auth_provider(provider): + """Set the global auth provider instance.""" + global _auth_provider + _auth_provider = provider + logger.debug("OAuth 2.1 session store configured") + + +def get_auth_provider(): + """Get the global auth provider instance.""" + return _auth_provider + + +def _resolve_client_credentials() -> Tuple[Optional[str], Optional[str]]: + """Resolve OAuth client credentials from the active provider or configuration.""" + client_id: Optional[str] = None + client_secret: Optional[str] = None + + if _auth_provider: + client_id = getattr(_auth_provider, "_upstream_client_id", None) + secret_obj = getattr(_auth_provider, "_upstream_client_secret", None) + if secret_obj is not None: + if hasattr(secret_obj, "get_secret_value"): + try: + client_secret = secret_obj.get_secret_value() # type: ignore[call-arg] + except Exception as exc: # pragma: no cover - defensive + logger.debug( + f"Failed to resolve client secret from provider: {exc}" + ) + elif isinstance(secret_obj, str): + client_secret = secret_obj + + if not client_id or not client_secret: + try: + from auth.oauth_config import get_oauth_config + + cfg = get_oauth_config() + client_id = client_id or cfg.client_id + client_secret = client_secret or cfg.client_secret + except Exception as exc: # pragma: no cover - defensive + logger.debug(f"Failed to resolve client credentials from config: {exc}") + + return client_id, client_secret + + +def _build_credentials_from_provider( + access_token: AccessToken, +) -> Optional[Credentials]: + """Construct Google credentials from the provider cache.""" + if not _auth_provider: + return None + + access_entry = getattr(_auth_provider, "_access_tokens", {}).get(access_token.token) + if not access_entry: + access_entry = access_token + + client_id, client_secret = _resolve_client_credentials() + + refresh_token_value = getattr(_auth_provider, "_access_to_refresh", {}).get( + access_token.token + ) + refresh_token_obj = None + if refresh_token_value: + refresh_token_obj = getattr(_auth_provider, "_refresh_tokens", {}).get( + refresh_token_value + ) + + expiry = None + expires_at = getattr(access_entry, "expires_at", None) + if expires_at: + try: + expiry_candidate = datetime.fromtimestamp(expires_at, tz=timezone.utc) + expiry = _normalize_expiry_to_naive_utc(expiry_candidate) + except Exception: # pragma: no cover - defensive + expiry = None + + scopes = getattr(access_entry, "scopes", None) + + return Credentials( + token=access_token.token, + refresh_token=refresh_token_obj.token if refresh_token_obj else None, + token_uri="https://oauth2.googleapis.com/token", + client_id=client_id, + client_secret=client_secret, + scopes=scopes, + expiry=expiry, + ) + + +def ensure_session_from_access_token( + access_token: AccessToken, + user_email: Optional[str], + mcp_session_id: Optional[str] = None, +) -> Optional[Credentials]: + """Ensure credentials derived from an access token are cached and returned.""" + + if not access_token: + return None + + email = user_email + if not email and getattr(access_token, "claims", None): + email = access_token.claims.get("email") + + credentials = _build_credentials_from_provider(access_token) + store_expiry: Optional[datetime] = None + + if credentials is None: + client_id, client_secret = _resolve_client_credentials() + expiry = None + expires_at = getattr(access_token, "expires_at", None) + if expires_at: + try: + expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc) + except Exception: # pragma: no cover - defensive + expiry = None + + normalized_expiry = _normalize_expiry_to_naive_utc(expiry) + credentials = Credentials( + token=access_token.token, + refresh_token=None, + token_uri="https://oauth2.googleapis.com/token", + client_id=client_id, + client_secret=client_secret, + scopes=getattr(access_token, "scopes", None), + expiry=normalized_expiry, + ) + store_expiry = expiry + else: + store_expiry = credentials.expiry + + # Skip session storage for external OAuth 2.1 to prevent memory leak from ephemeral tokens + if email and not is_external_oauth21_provider(): + try: + store = get_oauth21_session_store() + store.store_session( + user_email=email, + access_token=credentials.token, + refresh_token=credentials.refresh_token, + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=credentials.scopes, + expiry=store_expiry, + session_id=f"google_{email}", + mcp_session_id=mcp_session_id, + issuer="https://accounts.google.com", + ) + except Exception as exc: # pragma: no cover - defensive + logger.debug(f"Failed to cache credentials for {email}: {exc}") + + return credentials + + +def get_credentials_from_token( + access_token: str, user_email: Optional[str] = None +) -> Optional[Credentials]: + """ + Convert a bearer token to Google credentials. + + Args: + access_token: The bearer token + user_email: Optional user email for session lookup + + Returns: + Google Credentials object or None + """ + try: + store = get_oauth21_session_store() + + # If we have user_email, try to get credentials from store + if user_email: + credentials = store.get_credentials(user_email) + if credentials and credentials.token == access_token: + logger.debug(f"Found matching credentials from store for {user_email}") + return credentials + + # If the FastMCP provider is managing tokens, sync from provider storage + if _auth_provider: + access_record = getattr(_auth_provider, "_access_tokens", {}).get( + access_token + ) + if access_record: + logger.debug("Building credentials from FastMCP provider cache") + return ensure_session_from_access_token(access_record, user_email) + + # Otherwise, create minimal credentials with just the access token + # Assume token is valid for 1 hour (typical for Google tokens) + expiry = _normalize_expiry_to_naive_utc( + datetime.now(timezone.utc) + timedelta(hours=1) + ) + client_id, client_secret = _resolve_client_credentials() + + credentials = Credentials( + token=access_token, + refresh_token=None, + token_uri="https://oauth2.googleapis.com/token", + client_id=client_id, + client_secret=client_secret, + scopes=None, + expiry=expiry, + ) + + logger.debug("Created fallback Google credentials from bearer token") + return credentials + + except Exception as e: + logger.error(f"Failed to create Google credentials from token: {e}") + return None + + +def store_token_session( + token_response: dict, user_email: str, mcp_session_id: Optional[str] = None +) -> str: + """ + Store a token response in the session store. + + Args: + token_response: OAuth token response from Google + user_email: User's email address + mcp_session_id: Optional FastMCP session ID to map to this user + + Returns: + Session ID + """ + if not _auth_provider: + logger.error("Auth provider not configured") + return "" + + try: + # Try to get FastMCP session ID from context if not provided + if not mcp_session_id: + try: + from core.context import get_fastmcp_session_id + + mcp_session_id = get_fastmcp_session_id() + if mcp_session_id: + logger.debug( + f"Got FastMCP session ID from context: {mcp_session_id}" + ) + except Exception as e: + logger.debug(f"Could not get FastMCP session from context: {e}") + + # Store session in OAuth21SessionStore + store = get_oauth21_session_store() + + session_id = f"google_{user_email}" + client_id, client_secret = _resolve_client_credentials() + scopes = token_response.get("scope", "") + scopes_list = scopes.split() if scopes else None + expiry = datetime.now(timezone.utc) + timedelta( + seconds=token_response.get("expires_in", 3600) + ) + + store.store_session( + user_email=user_email, + access_token=token_response.get("access_token"), + refresh_token=token_response.get("refresh_token"), + token_uri="https://oauth2.googleapis.com/token", + client_id=client_id, + client_secret=client_secret, + scopes=scopes_list, + expiry=expiry, + session_id=session_id, + mcp_session_id=mcp_session_id, + issuer="https://accounts.google.com", + ) + + if mcp_session_id: + logger.info( + f"Stored token session for {user_email} with MCP session {mcp_session_id}" + ) + else: + logger.info(f"Stored token session for {user_email}") + + return session_id + + except Exception as e: + logger.error(f"Failed to store token session: {e}") + return "" diff --git a/auth/oauth_callback_server.py b/auth/oauth_callback_server.py new file mode 100644 index 0000000..a009d4c --- /dev/null +++ b/auth/oauth_callback_server.py @@ -0,0 +1,287 @@ +""" +Transport-aware OAuth callback handling. + +In streamable-http mode: Uses the existing FastAPI server +In stdio mode: Starts a minimal HTTP server just for OAuth callbacks +""" + +import asyncio +import logging +import threading +import time +import socket +import uvicorn + +from fastapi import FastAPI, Request +from fastapi.responses import FileResponse, JSONResponse +from typing import Optional +from urllib.parse import urlparse + +from auth.scopes import SCOPES, get_current_scopes # noqa +from auth.oauth_responses import ( + create_error_response, + create_success_response, + create_server_error_response, +) +from auth.google_auth import handle_auth_callback, check_client_secrets +from auth.oauth_config import get_oauth_redirect_uri + +logger = logging.getLogger(__name__) + + +class MinimalOAuthServer: + """ + Minimal HTTP server for OAuth callbacks in stdio mode. + Only starts when needed and uses the same port (8000) as streamable-http mode. + """ + + def __init__(self, port: int = 8000, base_uri: str = "http://localhost"): + self.port = port + self.base_uri = base_uri + self.app = FastAPI() + self.server = None + self.server_thread = None + self.is_running = False + + # Setup the callback route + self._setup_callback_route() + # Setup attachment serving route + self._setup_attachment_route() + + def _setup_callback_route(self): + """Setup the OAuth callback route.""" + + @self.app.get("/oauth2callback") + async def oauth_callback(request: Request): + """Handle OAuth callback - same logic as in core/server.py""" + code = request.query_params.get("code") + error = request.query_params.get("error") + + if error: + error_message = ( + f"Authentication failed: Google returned an error: {error}." + ) + logger.error(error_message) + return create_error_response(error_message) + + if not code: + error_message = ( + "Authentication failed: No authorization code received from Google." + ) + logger.error(error_message) + return create_error_response(error_message) + + try: + # Check if we have credentials available (environment variables or file) + error_message = check_client_secrets() + if error_message: + return create_server_error_response(error_message) + + logger.info( + "OAuth callback: Received authorization code. Attempting to exchange for tokens." + ) + + # Session ID tracking removed - not needed + + # Exchange code for credentials + redirect_uri = get_oauth_redirect_uri() + verified_user_id, credentials = handle_auth_callback( + scopes=get_current_scopes(), + authorization_response=str(request.url), + redirect_uri=redirect_uri, + session_id=None, + ) + + logger.info( + f"OAuth callback: Successfully authenticated user: {verified_user_id}." + ) + + # Return success page using shared template + return create_success_response(verified_user_id) + + except Exception as e: + error_message_detail = f"Error processing OAuth callback: {str(e)}" + logger.error(error_message_detail, exc_info=True) + return create_server_error_response(str(e)) + + def _setup_attachment_route(self): + """Setup the attachment serving route.""" + from core.attachment_storage import get_attachment_storage + + @self.app.get("/attachments/{file_id}") + async def serve_attachment(file_id: str, request: Request): + """Serve a stored attachment file.""" + storage = get_attachment_storage() + metadata = storage.get_attachment_metadata(file_id) + + if not metadata: + return JSONResponse( + {"error": "Attachment not found or expired"}, status_code=404 + ) + + file_path = storage.get_attachment_path(file_id) + if not file_path: + return JSONResponse( + {"error": "Attachment file not found"}, status_code=404 + ) + + return FileResponse( + path=str(file_path), + filename=metadata["filename"], + media_type=metadata["mime_type"], + ) + + def start(self) -> tuple[bool, str]: + """ + Start the minimal OAuth server. + + Returns: + Tuple of (success: bool, error_message: str) + """ + if self.is_running: + logger.info("Minimal OAuth server is already running") + return True, "" + + # Check if port is available + # Extract hostname from base_uri (e.g., "http://localhost" -> "localhost") + try: + parsed_uri = urlparse(self.base_uri) + hostname = parsed_uri.hostname or "localhost" + except Exception: + hostname = "localhost" + + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((hostname, self.port)) + except OSError: + error_msg = f"Port {self.port} is already in use on {hostname}. Cannot start minimal OAuth server." + logger.error(error_msg) + return False, error_msg + + def run_server(): + """Run the server in a separate thread.""" + try: + config = uvicorn.Config( + self.app, + host=hostname, + port=self.port, + log_level="warning", + access_log=False, + ) + self.server = uvicorn.Server(config) + asyncio.run(self.server.serve()) + + except Exception as e: + logger.error(f"Minimal OAuth server error: {e}", exc_info=True) + self.is_running = False + + # Start server in background thread + self.server_thread = threading.Thread(target=run_server, daemon=True) + self.server_thread.start() + + # Wait for server to start + max_wait = 3.0 + start_time = time.time() + while time.time() - start_time < max_wait: + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + result = s.connect_ex((hostname, self.port)) + if result == 0: + self.is_running = True + logger.info( + f"Minimal OAuth server started on {hostname}:{self.port}" + ) + return True, "" + except Exception: + pass + time.sleep(0.1) + + error_msg = f"Failed to start minimal OAuth server on {hostname}:{self.port} - server did not respond within {max_wait}s" + logger.error(error_msg) + return False, error_msg + + def stop(self): + """Stop the minimal OAuth server.""" + if not self.is_running: + return + + try: + if self.server: + if hasattr(self.server, "should_exit"): + self.server.should_exit = True + + if self.server_thread and self.server_thread.is_alive(): + self.server_thread.join(timeout=3.0) + + self.is_running = False + logger.info("Minimal OAuth server stopped") + + except Exception as e: + logger.error(f"Error stopping minimal OAuth server: {e}", exc_info=True) + + +# Global instance for stdio mode +_minimal_oauth_server: Optional[MinimalOAuthServer] = None + + +def ensure_oauth_callback_available( + transport_mode: str = "stdio", port: int = 8000, base_uri: str = "http://localhost" +) -> tuple[bool, str]: + """ + Ensure OAuth callback endpoint is available for the given transport mode. + + For streamable-http: Assumes the main server is already running + For stdio: Starts a minimal server if needed + + Args: + transport_mode: "stdio" or "streamable-http" + port: Port number (default 8000) + base_uri: Base URI (default "http://localhost") + + Returns: + Tuple of (success: bool, error_message: str) + """ + global _minimal_oauth_server + + if transport_mode == "streamable-http": + # In streamable-http mode, the main FastAPI server should handle callbacks + logger.debug( + "Using existing FastAPI server for OAuth callbacks (streamable-http mode)" + ) + return True, "" + + elif transport_mode == "stdio": + # In stdio mode, start minimal server if not already running + if _minimal_oauth_server is None: + logger.info(f"Creating minimal OAuth server instance for {base_uri}:{port}") + _minimal_oauth_server = MinimalOAuthServer(port, base_uri) + + if not _minimal_oauth_server.is_running: + logger.info("Starting minimal OAuth server for stdio mode") + success, error_msg = _minimal_oauth_server.start() + if success: + logger.info( + f"Minimal OAuth server successfully started on {base_uri}:{port}" + ) + return True, "" + else: + logger.error( + f"Failed to start minimal OAuth server on {base_uri}:{port}: {error_msg}" + ) + return False, error_msg + else: + logger.info("Minimal OAuth server is already running") + return True, "" + + else: + error_msg = f"Unknown transport mode: {transport_mode}" + logger.error(error_msg) + return False, error_msg + + +def cleanup_oauth_callback_server(): + """Clean up the minimal OAuth server if it was started.""" + global _minimal_oauth_server + if _minimal_oauth_server: + _minimal_oauth_server.stop() + _minimal_oauth_server = None diff --git a/auth/oauth_config.py b/auth/oauth_config.py new file mode 100644 index 0000000..f4e23b4 --- /dev/null +++ b/auth/oauth_config.py @@ -0,0 +1,444 @@ +""" +OAuth Configuration Management + +This module centralizes OAuth-related configuration to eliminate hardcoded values +scattered throughout the codebase. It provides environment variable support and +sensible defaults for all OAuth-related settings. + +Supports both OAuth 2.0 and OAuth 2.1 with automatic client capability detection. +""" + +import os +from threading import RLock +from urllib.parse import urlparse +from typing import List, Optional, Dict, Any + + +class OAuthConfig: + """ + Centralized OAuth configuration management. + + This class eliminates the hardcoded configuration anti-pattern identified + in the challenge review by providing a single source of truth for all + OAuth-related configuration values. + """ + + def __init__(self): + # Base server configuration + self.base_uri = os.getenv("WORKSPACE_MCP_BASE_URI", "http://localhost") + self.port = int(os.getenv("PORT", os.getenv("WORKSPACE_MCP_PORT", "8000"))) + self.base_url = f"{self.base_uri}:{self.port}" + + # External URL for reverse proxy scenarios + self.external_url = os.getenv("WORKSPACE_EXTERNAL_URL") + + # OAuth client configuration + self.client_id = os.getenv("GOOGLE_OAUTH_CLIENT_ID") + self.client_secret = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET") + + # OAuth 2.1 configuration + self.oauth21_enabled = ( + os.getenv("MCP_ENABLE_OAUTH21", "false").lower() == "true" + ) + self.pkce_required = self.oauth21_enabled # PKCE is mandatory in OAuth 2.1 + self.supported_code_challenge_methods = ( + ["S256", "plain"] if not self.oauth21_enabled else ["S256"] + ) + + # External OAuth 2.1 provider configuration + self.external_oauth21_provider = ( + os.getenv("EXTERNAL_OAUTH21_PROVIDER", "false").lower() == "true" + ) + if self.external_oauth21_provider and not self.oauth21_enabled: + raise ValueError( + "EXTERNAL_OAUTH21_PROVIDER requires MCP_ENABLE_OAUTH21=true" + ) + + # Stateless mode configuration + self.stateless_mode = ( + os.getenv("WORKSPACE_MCP_STATELESS_MODE", "false").lower() == "true" + ) + if self.stateless_mode and not self.oauth21_enabled: + raise ValueError( + "WORKSPACE_MCP_STATELESS_MODE requires MCP_ENABLE_OAUTH21=true" + ) + + # Transport mode (will be set at runtime) + self._transport_mode = "stdio" # Default + + # Redirect URI configuration + self.redirect_uri = self._get_redirect_uri() + self.redirect_path = self._get_redirect_path(self.redirect_uri) + + # Ensure FastMCP's Google provider picks up our existing configuration + self._apply_fastmcp_google_env() + + def _get_redirect_uri(self) -> str: + """ + Get the OAuth redirect URI, supporting reverse proxy configurations. + + Returns: + The configured redirect URI + """ + explicit_uri = os.getenv("GOOGLE_OAUTH_REDIRECT_URI") + if explicit_uri: + return explicit_uri + return f"{self.base_url}/oauth2callback" + + @staticmethod + def _get_redirect_path(uri: str) -> str: + """Extract the redirect path from a full redirect URI.""" + parsed = urlparse(uri) + if parsed.scheme or parsed.netloc: + path = parsed.path or "/oauth2callback" + else: + # If the value was already a path, ensure it starts with '/' + path = uri if uri.startswith("/") else f"/{uri}" + return path or "/oauth2callback" + + def _apply_fastmcp_google_env(self) -> None: + """Mirror legacy GOOGLE_* env vars into FastMCP Google provider settings.""" + if not self.client_id: + return + + def _set_if_absent(key: str, value: Optional[str]) -> None: + if value and key not in os.environ: + os.environ[key] = value + + # Don't set FASTMCP_SERVER_AUTH if using external OAuth provider + # (external OAuth means protocol-level auth is disabled, only tool-level auth) + if not self.external_oauth21_provider: + _set_if_absent( + "FASTMCP_SERVER_AUTH", + "fastmcp.server.auth.providers.google.GoogleProvider" + if self.oauth21_enabled + else None, + ) + + _set_if_absent("FASTMCP_SERVER_AUTH_GOOGLE_CLIENT_ID", self.client_id) + _set_if_absent("FASTMCP_SERVER_AUTH_GOOGLE_CLIENT_SECRET", self.client_secret) + _set_if_absent("FASTMCP_SERVER_AUTH_GOOGLE_BASE_URL", self.get_oauth_base_url()) + _set_if_absent("FASTMCP_SERVER_AUTH_GOOGLE_REDIRECT_PATH", self.redirect_path) + + def get_redirect_uris(self) -> List[str]: + """ + Get all valid OAuth redirect URIs. + + Returns: + List of all supported redirect URIs + """ + uris = [] + + # Primary redirect URI + uris.append(self.redirect_uri) + + # Custom redirect URIs from environment + custom_uris = os.getenv("OAUTH_CUSTOM_REDIRECT_URIS") + if custom_uris: + uris.extend([uri.strip() for uri in custom_uris.split(",")]) + + # Remove duplicates while preserving order + return list(dict.fromkeys(uris)) + + def get_allowed_origins(self) -> List[str]: + """ + Get allowed CORS origins for OAuth endpoints. + + Returns: + List of allowed origins for CORS + """ + origins = [] + + # Server's own origin + origins.append(self.base_url) + + # VS Code and development origins + origins.extend( + [ + "vscode-webview://", + "https://vscode.dev", + "https://github.dev", + ] + ) + + # Custom origins from environment + custom_origins = os.getenv("OAUTH_ALLOWED_ORIGINS") + if custom_origins: + origins.extend([origin.strip() for origin in custom_origins.split(",")]) + + return list(dict.fromkeys(origins)) + + def is_configured(self) -> bool: + """ + Check if OAuth is properly configured. + + Returns: + True if OAuth client credentials are available + """ + return bool(self.client_id and self.client_secret) + + def get_oauth_base_url(self) -> str: + """ + Get OAuth base URL for constructing OAuth endpoints. + + Uses WORKSPACE_EXTERNAL_URL if set (for reverse proxy scenarios), + otherwise falls back to constructed base_url with port. + + Returns: + Base URL for OAuth endpoints + """ + if self.external_url: + return self.external_url + return self.base_url + + def validate_redirect_uri(self, uri: str) -> bool: + """ + Validate if a redirect URI is allowed. + + Args: + uri: The redirect URI to validate + + Returns: + True if the URI is allowed, False otherwise + """ + allowed_uris = self.get_redirect_uris() + return uri in allowed_uris + + def get_environment_summary(self) -> dict: + """ + Get a summary of the current OAuth configuration. + + Returns: + Dictionary with configuration summary (excluding secrets) + """ + return { + "base_url": self.base_url, + "external_url": self.external_url, + "effective_oauth_url": self.get_oauth_base_url(), + "redirect_uri": self.redirect_uri, + "redirect_path": self.redirect_path, + "client_configured": bool(self.client_id), + "oauth21_enabled": self.oauth21_enabled, + "external_oauth21_provider": self.external_oauth21_provider, + "pkce_required": self.pkce_required, + "transport_mode": self._transport_mode, + "total_redirect_uris": len(self.get_redirect_uris()), + "total_allowed_origins": len(self.get_allowed_origins()), + } + + def set_transport_mode(self, mode: str) -> None: + """ + Set the current transport mode for OAuth callback handling. + + Args: + mode: Transport mode ("stdio", "streamable-http", etc.) + """ + self._transport_mode = mode + + def get_transport_mode(self) -> str: + """ + Get the current transport mode. + + Returns: + Current transport mode + """ + return self._transport_mode + + def is_oauth21_enabled(self) -> bool: + """ + Check if OAuth 2.1 mode is enabled. + + Returns: + True if OAuth 2.1 is enabled + """ + return self.oauth21_enabled + + def is_external_oauth21_provider(self) -> bool: + """ + Check if external OAuth 2.1 provider mode is enabled. + + When enabled, the server expects external OAuth flow with bearer tokens + in Authorization headers for tool calls. Protocol-level auth is disabled. + + Returns: + True if external OAuth 2.1 provider is enabled + """ + return self.external_oauth21_provider + + def detect_oauth_version(self, request_params: Dict[str, Any]) -> str: + """ + Detect OAuth version based on request parameters. + + This method implements a conservative detection strategy: + - Only returns "oauth21" when we have clear indicators + - Defaults to "oauth20" for backward compatibility + - Respects the global oauth21_enabled flag + + Args: + request_params: Request parameters from authorization or token request + + Returns: + "oauth21" or "oauth20" based on detection + """ + # If OAuth 2.1 is not enabled globally, always return OAuth 2.0 + if not self.oauth21_enabled: + return "oauth20" + + # Use the structured type for cleaner detection logic + from auth.oauth_types import OAuthVersionDetectionParams + + params = OAuthVersionDetectionParams.from_request(request_params) + + # Clear OAuth 2.1 indicator: PKCE is present + if params.has_pkce: + return "oauth21" + + # Additional detection: Check if we have an active OAuth 2.1 session + # This is important for tool calls where PKCE params aren't available + authenticated_user = request_params.get("authenticated_user") + if authenticated_user: + try: + from auth.oauth21_session_store import get_oauth21_session_store + + store = get_oauth21_session_store() + if store.has_session(authenticated_user): + return "oauth21" + except (ImportError, AttributeError, RuntimeError): + pass # Fall back to OAuth 2.0 if session check fails + + # For public clients in OAuth 2.1 mode, we require PKCE + # But since they didn't send PKCE, fall back to OAuth 2.0 + # This ensures backward compatibility + + # Default to OAuth 2.0 for maximum compatibility + return "oauth20" + + def get_authorization_server_metadata( + self, scopes: Optional[List[str]] = None + ) -> Dict[str, Any]: + """ + Get OAuth authorization server metadata per RFC 8414. + + Args: + scopes: Optional list of supported scopes to include in metadata + + Returns: + Authorization server metadata dictionary + """ + oauth_base = self.get_oauth_base_url() + metadata = { + "issuer": "https://accounts.google.com", + "authorization_endpoint": f"{oauth_base}/oauth2/authorize", + "token_endpoint": f"{oauth_base}/oauth2/token", + "registration_endpoint": f"{oauth_base}/oauth2/register", + "jwks_uri": "https://www.googleapis.com/oauth2/v3/certs", + "userinfo_endpoint": "https://openidconnect.googleapis.com/v1/userinfo", + "response_types_supported": ["code", "token"], + "grant_types_supported": ["authorization_code", "refresh_token"], + "token_endpoint_auth_methods_supported": [ + "client_secret_post", + "client_secret_basic", + ], + "code_challenge_methods_supported": self.supported_code_challenge_methods, + } + + # Include scopes if provided + if scopes is not None: + metadata["scopes_supported"] = scopes + + # Add OAuth 2.1 specific metadata + if self.oauth21_enabled: + metadata["pkce_required"] = True + # OAuth 2.1 deprecates implicit flow + metadata["response_types_supported"] = ["code"] + # OAuth 2.1 requires exact redirect URI matching + metadata["require_exact_redirect_uri"] = True + + return metadata + + +# Global configuration instance with thread-safe access +_oauth_config = None +_oauth_config_lock = RLock() + + +def get_oauth_config() -> OAuthConfig: + """ + Get the global OAuth configuration instance. + + Thread-safe singleton accessor. + + Returns: + The singleton OAuth configuration instance + """ + global _oauth_config + with _oauth_config_lock: + if _oauth_config is None: + _oauth_config = OAuthConfig() + return _oauth_config + + +def reload_oauth_config() -> OAuthConfig: + """ + Reload the OAuth configuration from environment variables. + + Thread-safe reload that prevents races with concurrent access. + + Returns: + The reloaded OAuth configuration instance + """ + global _oauth_config + with _oauth_config_lock: + _oauth_config = OAuthConfig() + return _oauth_config + + +# Convenience functions for backward compatibility +def get_oauth_base_url() -> str: + """Get OAuth base URL.""" + return get_oauth_config().get_oauth_base_url() + + +def get_redirect_uris() -> List[str]: + """Get all valid OAuth redirect URIs.""" + return get_oauth_config().get_redirect_uris() + + +def get_allowed_origins() -> List[str]: + """Get allowed CORS origins.""" + return get_oauth_config().get_allowed_origins() + + +def is_oauth_configured() -> bool: + """Check if OAuth is properly configured.""" + return get_oauth_config().is_configured() + + +def set_transport_mode(mode: str) -> None: + """Set the current transport mode.""" + get_oauth_config().set_transport_mode(mode) + + +def get_transport_mode() -> str: + """Get the current transport mode.""" + return get_oauth_config().get_transport_mode() + + +def is_oauth21_enabled() -> bool: + """Check if OAuth 2.1 is enabled.""" + return get_oauth_config().is_oauth21_enabled() + + +def get_oauth_redirect_uri() -> str: + """Get the primary OAuth redirect URI.""" + return get_oauth_config().redirect_uri + + +def is_stateless_mode() -> bool: + """Check if stateless mode is enabled.""" + return get_oauth_config().stateless_mode + + +def is_external_oauth21_provider() -> bool: + """Check if external OAuth 2.1 provider mode is enabled.""" + return get_oauth_config().is_external_oauth21_provider() diff --git a/auth/oauth_responses.py b/auth/oauth_responses.py new file mode 100644 index 0000000..5c2a0a9 --- /dev/null +++ b/auth/oauth_responses.py @@ -0,0 +1,229 @@ +""" +Shared OAuth callback response templates. + +Provides reusable HTML response templates for OAuth authentication flows +to eliminate duplication between server.py and oauth_callback_server.py. +""" + +from fastapi.responses import HTMLResponse +from typing import Optional + + +def create_error_response(error_message: str, status_code: int = 400) -> HTMLResponse: + """ + Create a standardized error response for OAuth failures. + + Args: + error_message: The error message to display + status_code: HTTP status code (default 400) + + Returns: + HTMLResponse with error page + """ + content = f""" + + Authentication Error + +

Authentication Error

+

{error_message}

+

Please ensure you grant the requested permissions. You can close this tab and try again.

+ + + """ + return HTMLResponse(content=content, status_code=status_code) + + +def create_success_response(verified_user_id: Optional[str] = None) -> HTMLResponse: + """ + Create a standardized success response for OAuth authentication. + + Args: + verified_user_id: The authenticated user's email (optional) + + Returns: + HTMLResponse with success page + """ + # Handle the case where no user ID is provided + user_display = verified_user_id if verified_user_id else "Google User" + + content = f""" + + Authentication Successful + + + + +
+
+

Authentication Successful

+
+ You've been authenticated as {user_display} +
+
+ Your credentials have been securely saved. You can now close this tab and retry your original command. +
+ +
This tab will close automatically in 10 seconds
+
+ +""" + return HTMLResponse(content=content) + + +def create_server_error_response(error_detail: str) -> HTMLResponse: + """ + Create a standardized server error response for OAuth processing failures. + + Args: + error_detail: The detailed error message + + Returns: + HTMLResponse with server error page + """ + content = f""" + + Authentication Processing Error + +

Authentication Processing Error

+

An unexpected error occurred while processing your authentication: {error_detail}

+

Please try again. You can close this tab.

+ + + """ + return HTMLResponse(content=content, status_code=500) diff --git a/auth/oauth_types.py b/auth/oauth_types.py new file mode 100644 index 0000000..e353e33 --- /dev/null +++ b/auth/oauth_types.py @@ -0,0 +1,92 @@ +""" +Type definitions for OAuth authentication. + +This module provides structured types for OAuth-related parameters, +improving code maintainability and type safety. +""" + +from dataclasses import dataclass +from typing import Optional, List, Dict, Any + +from fastmcp.server.auth import AccessToken + + +class WorkspaceAccessToken(AccessToken): + """AccessToken extended with workspace-specific fields.""" + + session_id: Optional[str] = None + sub: Optional[str] = None + email: Optional[str] = None + + +@dataclass +class OAuth21ServiceRequest: + """ + Encapsulates parameters for OAuth 2.1 service authentication requests. + + This parameter object pattern reduces function complexity and makes + it easier to extend authentication parameters in the future. + """ + + service_name: str + version: str + tool_name: str + user_google_email: str + required_scopes: List[str] + session_id: Optional[str] = None + auth_token_email: Optional[str] = None + allow_recent_auth: bool = False + context: Optional[Dict[str, Any]] = None + + def to_legacy_params(self) -> dict: + """Convert to legacy parameter format for backward compatibility.""" + return { + "service_name": self.service_name, + "version": self.version, + "tool_name": self.tool_name, + "user_google_email": self.user_google_email, + "required_scopes": self.required_scopes, + } + + +@dataclass +class OAuthVersionDetectionParams: + """ + Parameters used for OAuth version detection. + + Encapsulates the various signals we use to determine + whether a client supports OAuth 2.1 or needs OAuth 2.0. + """ + + client_id: Optional[str] = None + client_secret: Optional[str] = None + code_challenge: Optional[str] = None + code_challenge_method: Optional[str] = None + code_verifier: Optional[str] = None + authenticated_user: Optional[str] = None + session_id: Optional[str] = None + + @classmethod + def from_request( + cls, request_params: Dict[str, Any] + ) -> "OAuthVersionDetectionParams": + """Create from raw request parameters.""" + return cls( + client_id=request_params.get("client_id"), + client_secret=request_params.get("client_secret"), + code_challenge=request_params.get("code_challenge"), + code_challenge_method=request_params.get("code_challenge_method"), + code_verifier=request_params.get("code_verifier"), + authenticated_user=request_params.get("authenticated_user"), + session_id=request_params.get("session_id"), + ) + + @property + def has_pkce(self) -> bool: + """Check if PKCE parameters are present.""" + return bool(self.code_challenge or self.code_verifier) + + @property + def is_public_client(self) -> bool: + """Check if this appears to be a public client (no secret).""" + return bool(self.client_id and not self.client_secret) diff --git a/auth/permissions.py b/auth/permissions.py new file mode 100644 index 0000000..547f3d5 --- /dev/null +++ b/auth/permissions.py @@ -0,0 +1,277 @@ +""" +Granular per-service permission levels. + +Each service has named permission levels (cumulative), mapping to a list of +OAuth scopes. The levels for a service are ordered from least to most +permissive — requesting level N implicitly includes all scopes from levels < N. + +Usage: + --permissions gmail:organize drive:readonly + +Gmail levels: readonly, organize, drafts, send, full +Tasks levels: readonly, manage, full +Other services: readonly, full (extensible by adding entries to SERVICE_PERMISSION_LEVELS) +""" + +import logging +from typing import Dict, FrozenSet, List, Optional, Tuple + +from auth.scopes import ( + GMAIL_READONLY_SCOPE, + GMAIL_LABELS_SCOPE, + GMAIL_MODIFY_SCOPE, + GMAIL_COMPOSE_SCOPE, + GMAIL_SEND_SCOPE, + GMAIL_SETTINGS_BASIC_SCOPE, + DRIVE_READONLY_SCOPE, + DRIVE_FILE_SCOPE, + DRIVE_SCOPE, + CALENDAR_READONLY_SCOPE, + CALENDAR_EVENTS_SCOPE, + CALENDAR_SCOPE, + DOCS_READONLY_SCOPE, + DOCS_WRITE_SCOPE, + SHEETS_READONLY_SCOPE, + SHEETS_WRITE_SCOPE, + CHAT_READONLY_SCOPE, + CHAT_WRITE_SCOPE, + CHAT_SPACES_SCOPE, + CHAT_SPACES_READONLY_SCOPE, + FORMS_BODY_SCOPE, + FORMS_BODY_READONLY_SCOPE, + FORMS_RESPONSES_READONLY_SCOPE, + SLIDES_SCOPE, + SLIDES_READONLY_SCOPE, + TASKS_SCOPE, + TASKS_READONLY_SCOPE, + CONTACTS_SCOPE, + CONTACTS_READONLY_SCOPE, + CUSTOM_SEARCH_SCOPE, + SCRIPT_PROJECTS_SCOPE, + SCRIPT_PROJECTS_READONLY_SCOPE, + SCRIPT_DEPLOYMENTS_SCOPE, + SCRIPT_DEPLOYMENTS_READONLY_SCOPE, + SCRIPT_PROCESSES_READONLY_SCOPE, + SCRIPT_METRICS_SCOPE, +) + +logger = logging.getLogger(__name__) + +# Ordered permission levels per service. +# Each entry is (level_name, [additional_scopes_at_this_level]). +# Scopes are CUMULATIVE: level N includes all scopes from levels 0..N. +SERVICE_PERMISSION_LEVELS: Dict[str, List[Tuple[str, List[str]]]] = { + "gmail": [ + ("readonly", [GMAIL_READONLY_SCOPE]), + ("organize", [GMAIL_LABELS_SCOPE, GMAIL_MODIFY_SCOPE]), + ("drafts", [GMAIL_COMPOSE_SCOPE]), + ("send", [GMAIL_SEND_SCOPE]), + ("full", [GMAIL_SETTINGS_BASIC_SCOPE]), + ], + "drive": [ + ("readonly", [DRIVE_READONLY_SCOPE]), + ("full", [DRIVE_SCOPE, DRIVE_FILE_SCOPE]), + ], + "calendar": [ + ("readonly", [CALENDAR_READONLY_SCOPE]), + ("full", [CALENDAR_SCOPE, CALENDAR_EVENTS_SCOPE]), + ], + "docs": [ + ("readonly", [DOCS_READONLY_SCOPE, DRIVE_READONLY_SCOPE]), + ("full", [DOCS_WRITE_SCOPE, DRIVE_READONLY_SCOPE, DRIVE_FILE_SCOPE]), + ], + "sheets": [ + ("readonly", [SHEETS_READONLY_SCOPE, DRIVE_READONLY_SCOPE]), + ("full", [SHEETS_WRITE_SCOPE, DRIVE_READONLY_SCOPE]), + ], + "chat": [ + ("readonly", [CHAT_READONLY_SCOPE, CHAT_SPACES_READONLY_SCOPE]), + ("full", [CHAT_WRITE_SCOPE, CHAT_SPACES_SCOPE]), + ], + "forms": [ + ("readonly", [FORMS_BODY_READONLY_SCOPE, FORMS_RESPONSES_READONLY_SCOPE]), + ("full", [FORMS_BODY_SCOPE, FORMS_RESPONSES_READONLY_SCOPE]), + ], + "slides": [ + ("readonly", [SLIDES_READONLY_SCOPE]), + ("full", [SLIDES_SCOPE]), + ], + "tasks": [ + ("readonly", [TASKS_READONLY_SCOPE]), + ("manage", [TASKS_SCOPE]), + ("full", []), + ], + "contacts": [ + ("readonly", [CONTACTS_READONLY_SCOPE]), + ("full", [CONTACTS_SCOPE]), + ], + "search": [ + ("readonly", [CUSTOM_SEARCH_SCOPE]), + ("full", [CUSTOM_SEARCH_SCOPE]), + ], + "appscript": [ + ( + "readonly", + [ + SCRIPT_PROJECTS_READONLY_SCOPE, + SCRIPT_DEPLOYMENTS_READONLY_SCOPE, + SCRIPT_PROCESSES_READONLY_SCOPE, + SCRIPT_METRICS_SCOPE, + DRIVE_READONLY_SCOPE, + ], + ), + ( + "full", + [ + SCRIPT_PROJECTS_SCOPE, + SCRIPT_DEPLOYMENTS_SCOPE, + SCRIPT_PROCESSES_READONLY_SCOPE, + SCRIPT_METRICS_SCOPE, + DRIVE_FILE_SCOPE, + ], + ), + ], +} + +# Actions denied at specific permission levels. +# Maps service -> level -> frozenset of denied action names. +# Levels not listed here (or services without entries) deny nothing. +SERVICE_DENIED_ACTIONS: Dict[str, Dict[str, FrozenSet[str]]] = { + "tasks": { + "manage": frozenset({"delete", "clear_completed"}), + }, +} + + +def is_action_denied(service: str, action: str) -> bool: + """Check whether *action* is denied for *service* under current permissions. + + Returns ``False`` when granular permissions mode is not active, when the + service has no permission entry, or when the configured level does not + deny the action. + """ + if _PERMISSIONS is None: + return False + level = _PERMISSIONS.get(service) + if level is None: + return False + denied = SERVICE_DENIED_ACTIONS.get(service, {}).get(level, frozenset()) + return action in denied + + +# Module-level state: parsed --permissions config +# Dict mapping service_name -> level_name, e.g. {"gmail": "organize"} +_PERMISSIONS: Optional[Dict[str, str]] = None + + +def set_permissions(permissions: Optional[Dict[str, str]]) -> None: + """Set granular permissions from parsed --permissions argument.""" + global _PERMISSIONS + _PERMISSIONS = permissions + if permissions is not None: + logger.info("Granular permissions set: %s", permissions) + + +def get_permissions() -> Optional[Dict[str, str]]: + """Return current permissions dict, or None if not using granular mode.""" + return _PERMISSIONS + + +def is_permissions_mode() -> bool: + """Check if granular permissions mode is active.""" + return _PERMISSIONS is not None + + +def get_scopes_for_permission(service: str, level: str) -> List[str]: + """ + Get cumulative scopes for a service at a given permission level. + + Returns all scopes up to and including the named level. + Raises ValueError if service or level is unknown. + """ + levels = SERVICE_PERMISSION_LEVELS.get(service) + if levels is None: + raise ValueError(f"Unknown service: '{service}'") + + cumulative: List[str] = [] + found = False + for level_name, level_scopes in levels: + cumulative.extend(level_scopes) + if level_name == level: + found = True + break + + if not found: + valid = [name for name, _ in levels] + raise ValueError( + f"Unknown permission level '{level}' for service '{service}'. " + f"Valid levels: {valid}" + ) + + return sorted(set(cumulative)) + + +def get_all_permission_scopes() -> List[str]: + """ + Get the combined scopes for all services at their configured permission levels. + + Only meaningful when is_permissions_mode() is True. + """ + if _PERMISSIONS is None: + return [] + + all_scopes: set = set() + for service, level in _PERMISSIONS.items(): + all_scopes.update(get_scopes_for_permission(service, level)) + return list(all_scopes) + + +def get_allowed_scopes_set() -> Optional[set]: + """ + Get the set of allowed scopes under permissions mode (for tool filtering). + + Returns None if permissions mode is not active. + """ + if _PERMISSIONS is None: + return None + return set(get_all_permission_scopes()) + + +def get_valid_levels(service: str) -> List[str]: + """Get valid permission level names for a service.""" + levels = SERVICE_PERMISSION_LEVELS.get(service) + if levels is None: + return [] + return [name for name, _ in levels] + + +def parse_permissions_arg(permissions_list: List[str]) -> Dict[str, str]: + """ + Parse --permissions arguments like ["gmail:organize", "drive:full"]. + + Returns dict mapping service -> level. + Raises ValueError on parse errors (unknown service, invalid level, bad format). + """ + result: Dict[str, str] = {} + for entry in permissions_list: + if ":" not in entry: + raise ValueError( + f"Invalid permission format: '{entry}'. " + f"Expected 'service:level' (e.g., 'gmail:organize', 'drive:readonly')" + ) + service, level = entry.split(":", 1) + if service in result: + raise ValueError(f"Duplicate service in permissions: '{service}'") + if service not in SERVICE_PERMISSION_LEVELS: + raise ValueError( + f"Unknown service: '{service}'. " + f"Valid services: {sorted(SERVICE_PERMISSION_LEVELS.keys())}" + ) + valid = get_valid_levels(service) + if level not in valid: + raise ValueError( + f"Unknown level '{level}' for service '{service}'. " + f"Valid levels: {valid}" + ) + result[service] = level + return result diff --git a/auth/scopes.py b/auth/scopes.py new file mode 100644 index 0000000..aa610ac --- /dev/null +++ b/auth/scopes.py @@ -0,0 +1,336 @@ +""" +Google Workspace OAuth Scopes + +This module centralizes OAuth scope definitions for Google Workspace integration. +Separated from service_decorator.py to avoid circular imports. +""" + +import logging + +logger = logging.getLogger(__name__) + +# Global variable to store enabled tools (set by main.py) +_ENABLED_TOOLS = None + +# Individual OAuth Scope Constants +USERINFO_EMAIL_SCOPE = "https://www.googleapis.com/auth/userinfo.email" +USERINFO_PROFILE_SCOPE = "https://www.googleapis.com/auth/userinfo.profile" +OPENID_SCOPE = "openid" +CALENDAR_SCOPE = "https://www.googleapis.com/auth/calendar" +CALENDAR_READONLY_SCOPE = "https://www.googleapis.com/auth/calendar.readonly" +CALENDAR_EVENTS_SCOPE = "https://www.googleapis.com/auth/calendar.events" + +# Google Drive scopes +DRIVE_SCOPE = "https://www.googleapis.com/auth/drive" +DRIVE_READONLY_SCOPE = "https://www.googleapis.com/auth/drive.readonly" +DRIVE_FILE_SCOPE = "https://www.googleapis.com/auth/drive.file" + +# Google Docs scopes +DOCS_READONLY_SCOPE = "https://www.googleapis.com/auth/documents.readonly" +DOCS_WRITE_SCOPE = "https://www.googleapis.com/auth/documents" + +# Gmail API scopes +GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly" +GMAIL_SEND_SCOPE = "https://www.googleapis.com/auth/gmail.send" +GMAIL_COMPOSE_SCOPE = "https://www.googleapis.com/auth/gmail.compose" +GMAIL_MODIFY_SCOPE = "https://www.googleapis.com/auth/gmail.modify" +GMAIL_LABELS_SCOPE = "https://www.googleapis.com/auth/gmail.labels" +GMAIL_SETTINGS_BASIC_SCOPE = "https://www.googleapis.com/auth/gmail.settings.basic" + +# Google Chat API scopes +CHAT_READONLY_SCOPE = "https://www.googleapis.com/auth/chat.messages.readonly" +CHAT_WRITE_SCOPE = "https://www.googleapis.com/auth/chat.messages" +CHAT_SPACES_SCOPE = "https://www.googleapis.com/auth/chat.spaces" +CHAT_SPACES_READONLY_SCOPE = "https://www.googleapis.com/auth/chat.spaces.readonly" + +# Google Sheets API scopes +SHEETS_READONLY_SCOPE = "https://www.googleapis.com/auth/spreadsheets.readonly" +SHEETS_WRITE_SCOPE = "https://www.googleapis.com/auth/spreadsheets" + +# Google Forms API scopes +FORMS_BODY_SCOPE = "https://www.googleapis.com/auth/forms.body" +FORMS_BODY_READONLY_SCOPE = "https://www.googleapis.com/auth/forms.body.readonly" +FORMS_RESPONSES_READONLY_SCOPE = ( + "https://www.googleapis.com/auth/forms.responses.readonly" +) + +# Google Slides API scopes +SLIDES_SCOPE = "https://www.googleapis.com/auth/presentations" +SLIDES_READONLY_SCOPE = "https://www.googleapis.com/auth/presentations.readonly" + +# Google Tasks API scopes +TASKS_SCOPE = "https://www.googleapis.com/auth/tasks" +TASKS_READONLY_SCOPE = "https://www.googleapis.com/auth/tasks.readonly" + +# Google Contacts (People API) scopes +CONTACTS_SCOPE = "https://www.googleapis.com/auth/contacts" +CONTACTS_READONLY_SCOPE = "https://www.googleapis.com/auth/contacts.readonly" + +# Google Custom Search API scope +CUSTOM_SEARCH_SCOPE = "https://www.googleapis.com/auth/cse" + +# Google Apps Script API scopes +SCRIPT_PROJECTS_SCOPE = "https://www.googleapis.com/auth/script.projects" +SCRIPT_PROJECTS_READONLY_SCOPE = ( + "https://www.googleapis.com/auth/script.projects.readonly" +) +SCRIPT_DEPLOYMENTS_SCOPE = "https://www.googleapis.com/auth/script.deployments" +SCRIPT_DEPLOYMENTS_READONLY_SCOPE = ( + "https://www.googleapis.com/auth/script.deployments.readonly" +) +SCRIPT_PROCESSES_READONLY_SCOPE = "https://www.googleapis.com/auth/script.processes" +SCRIPT_METRICS_SCOPE = "https://www.googleapis.com/auth/script.metrics" + +# Google scope hierarchy: broader scopes that implicitly cover narrower ones. +# See https://developers.google.com/gmail/api/auth/scopes, +# https://developers.google.com/drive/api/guides/api-specific-auth, etc. +SCOPE_HIERARCHY = { + GMAIL_MODIFY_SCOPE: { + GMAIL_READONLY_SCOPE, + GMAIL_SEND_SCOPE, + GMAIL_COMPOSE_SCOPE, + GMAIL_LABELS_SCOPE, + }, + DRIVE_SCOPE: {DRIVE_READONLY_SCOPE, DRIVE_FILE_SCOPE}, + CALENDAR_SCOPE: {CALENDAR_READONLY_SCOPE, CALENDAR_EVENTS_SCOPE}, + DOCS_WRITE_SCOPE: {DOCS_READONLY_SCOPE}, + SHEETS_WRITE_SCOPE: {SHEETS_READONLY_SCOPE}, + SLIDES_SCOPE: {SLIDES_READONLY_SCOPE}, + TASKS_SCOPE: {TASKS_READONLY_SCOPE}, + CONTACTS_SCOPE: {CONTACTS_READONLY_SCOPE}, + CHAT_WRITE_SCOPE: {CHAT_READONLY_SCOPE}, + CHAT_SPACES_SCOPE: {CHAT_SPACES_READONLY_SCOPE}, + FORMS_BODY_SCOPE: {FORMS_BODY_READONLY_SCOPE}, + SCRIPT_PROJECTS_SCOPE: {SCRIPT_PROJECTS_READONLY_SCOPE}, + SCRIPT_DEPLOYMENTS_SCOPE: {SCRIPT_DEPLOYMENTS_READONLY_SCOPE}, +} + + +def has_required_scopes(available_scopes, required_scopes): + """ + Check if available scopes satisfy all required scopes, accounting for + Google's scope hierarchy (e.g., gmail.modify covers gmail.readonly). + + Args: + available_scopes: Scopes the credentials have (set, list, or frozenset). + required_scopes: Scopes that are required (set, list, or frozenset). + + Returns: + True if all required scopes are satisfied. + """ + available = set(available_scopes or []) + required = set(required_scopes or []) + # Expand available scopes with implied narrower scopes + expanded = set(available) + for broad_scope, covered in SCOPE_HIERARCHY.items(): + if broad_scope in available: + expanded.update(covered) + return all(scope in expanded for scope in required) + + +# Base OAuth scopes required for user identification +BASE_SCOPES = [USERINFO_EMAIL_SCOPE, USERINFO_PROFILE_SCOPE, OPENID_SCOPE] + +# Service-specific scope groups +DOCS_SCOPES = [ + DOCS_READONLY_SCOPE, + DOCS_WRITE_SCOPE, + DRIVE_READONLY_SCOPE, + DRIVE_FILE_SCOPE, +] + +CALENDAR_SCOPES = [CALENDAR_SCOPE, CALENDAR_READONLY_SCOPE, CALENDAR_EVENTS_SCOPE] + +DRIVE_SCOPES = [DRIVE_SCOPE, DRIVE_READONLY_SCOPE, DRIVE_FILE_SCOPE] + +GMAIL_SCOPES = [ + GMAIL_READONLY_SCOPE, + GMAIL_SEND_SCOPE, + GMAIL_COMPOSE_SCOPE, + GMAIL_MODIFY_SCOPE, + GMAIL_LABELS_SCOPE, + GMAIL_SETTINGS_BASIC_SCOPE, +] + +CHAT_SCOPES = [ + CHAT_READONLY_SCOPE, + CHAT_WRITE_SCOPE, + CHAT_SPACES_SCOPE, + CHAT_SPACES_READONLY_SCOPE, +] + +SHEETS_SCOPES = [SHEETS_READONLY_SCOPE, SHEETS_WRITE_SCOPE, DRIVE_READONLY_SCOPE] + +FORMS_SCOPES = [ + FORMS_BODY_SCOPE, + FORMS_BODY_READONLY_SCOPE, + FORMS_RESPONSES_READONLY_SCOPE, +] + +SLIDES_SCOPES = [SLIDES_SCOPE, SLIDES_READONLY_SCOPE] + +TASKS_SCOPES = [TASKS_SCOPE, TASKS_READONLY_SCOPE] + +CONTACTS_SCOPES = [CONTACTS_SCOPE, CONTACTS_READONLY_SCOPE] + +CUSTOM_SEARCH_SCOPES = [CUSTOM_SEARCH_SCOPE] + +SCRIPT_SCOPES = [ + SCRIPT_PROJECTS_SCOPE, + SCRIPT_PROJECTS_READONLY_SCOPE, + SCRIPT_DEPLOYMENTS_SCOPE, + SCRIPT_DEPLOYMENTS_READONLY_SCOPE, + SCRIPT_PROCESSES_READONLY_SCOPE, # Required for list_script_processes + SCRIPT_METRICS_SCOPE, # Required for get_script_metrics + DRIVE_FILE_SCOPE, # Required for list/delete script projects (uses Drive API) +] + +# Tool-to-scopes mapping +TOOL_SCOPES_MAP = { + "gmail": GMAIL_SCOPES, + "drive": DRIVE_SCOPES, + "calendar": CALENDAR_SCOPES, + "docs": DOCS_SCOPES, + "sheets": SHEETS_SCOPES, + "chat": CHAT_SCOPES, + "forms": FORMS_SCOPES, + "slides": SLIDES_SCOPES, + "tasks": TASKS_SCOPES, + "contacts": CONTACTS_SCOPES, + "search": CUSTOM_SEARCH_SCOPES, + "appscript": SCRIPT_SCOPES, +} + +# Tool-to-read-only-scopes mapping +TOOL_READONLY_SCOPES_MAP = { + "gmail": [GMAIL_READONLY_SCOPE], + "drive": [DRIVE_READONLY_SCOPE], + "calendar": [CALENDAR_READONLY_SCOPE], + "docs": [DOCS_READONLY_SCOPE, DRIVE_READONLY_SCOPE], + "sheets": [SHEETS_READONLY_SCOPE, DRIVE_READONLY_SCOPE], + "chat": [CHAT_READONLY_SCOPE, CHAT_SPACES_READONLY_SCOPE], + "forms": [FORMS_BODY_READONLY_SCOPE, FORMS_RESPONSES_READONLY_SCOPE], + "slides": [SLIDES_READONLY_SCOPE], + "tasks": [TASKS_READONLY_SCOPE], + "contacts": [CONTACTS_READONLY_SCOPE], + "search": CUSTOM_SEARCH_SCOPES, + "appscript": [ + SCRIPT_PROJECTS_READONLY_SCOPE, + SCRIPT_DEPLOYMENTS_READONLY_SCOPE, + SCRIPT_PROCESSES_READONLY_SCOPE, + SCRIPT_METRICS_SCOPE, + DRIVE_READONLY_SCOPE, + ], +} + + +def set_enabled_tools(enabled_tools): + """ + Set the globally enabled tools list. + + Args: + enabled_tools: List of enabled tool names. + """ + global _ENABLED_TOOLS + _ENABLED_TOOLS = enabled_tools + logger.info(f"Enabled tools set for scope management: {enabled_tools}") + + +# Global variable to store read-only mode (set by main.py) +_READ_ONLY_MODE = False + + +def set_read_only(enabled: bool): + """ + Set the global read-only mode. + + Args: + enabled: Boolean indicating if read-only mode should be enabled. + """ + global _READ_ONLY_MODE + _READ_ONLY_MODE = enabled + logger.info(f"Read-only mode set to: {enabled}") + + +def is_read_only_mode() -> bool: + """Check if read-only mode is enabled.""" + return _READ_ONLY_MODE + + +def get_all_read_only_scopes() -> list[str]: + """Get all possible read-only scopes across all tools.""" + all_scopes = set(BASE_SCOPES) + for scopes in TOOL_READONLY_SCOPES_MAP.values(): + all_scopes.update(scopes) + return list(all_scopes) + + +def get_current_scopes(): + """ + Returns scopes for currently enabled tools. + Uses globally set enabled tools or all tools if not set. + + .. deprecated:: + This function is a thin wrapper around get_scopes_for_tools() and exists + for backwards compatibility. Prefer using get_scopes_for_tools() directly + for new code, which allows explicit control over the tool list parameter. + + Returns: + List of unique scopes for the enabled tools plus base scopes. + """ + return get_scopes_for_tools(_ENABLED_TOOLS) + + +def get_scopes_for_tools(enabled_tools=None): + """ + Returns scopes for enabled tools only. + + Args: + enabled_tools: List of enabled tool names. If None, returns all scopes. + + Returns: + List of unique scopes for the enabled tools plus base scopes. + """ + # Granular permissions mode overrides both full and read-only scope maps. + # Lazy import with guard to avoid circular dependency during module init + # (SCOPES = get_scopes_for_tools() runs at import time before auth.permissions + # is fully loaded, but permissions mode is never active at that point). + try: + from auth.permissions import is_permissions_mode, get_all_permission_scopes + + if is_permissions_mode(): + scopes = BASE_SCOPES.copy() + scopes.extend(get_all_permission_scopes()) + logger.debug( + "Generated scopes from granular permissions: %d unique scopes", + len(set(scopes)), + ) + return list(set(scopes)) + except ImportError: + pass + + if enabled_tools is None: + # Default behavior - return all scopes + enabled_tools = TOOL_SCOPES_MAP.keys() + + # Start with base scopes (always required) + scopes = BASE_SCOPES.copy() + + # Determine which map to use based on read-only mode + scope_map = TOOL_READONLY_SCOPES_MAP if _READ_ONLY_MODE else TOOL_SCOPES_MAP + mode_str = "read-only" if _READ_ONLY_MODE else "full" + + # Add scopes for each enabled tool + for tool in enabled_tools: + if tool in scope_map: + scopes.extend(scope_map[tool]) + + logger.debug( + f"Generated {mode_str} scopes for tools {list(enabled_tools)}: {len(set(scopes))} unique scopes" + ) + # Return unique scopes + return list(set(scopes)) + + +# Combined scopes for all supported Google Workspace operations (backwards compatibility) +SCOPES = get_scopes_for_tools() diff --git a/auth/service_decorator.py b/auth/service_decorator.py new file mode 100644 index 0000000..a045f84 --- /dev/null +++ b/auth/service_decorator.py @@ -0,0 +1,862 @@ +import inspect +import logging + +import re +from functools import wraps +from typing import Dict, List, Optional, Any, Callable, Union, Tuple +from contextlib import ExitStack + +from google.auth.exceptions import RefreshError +from googleapiclient.discovery import build +from fastmcp.server.dependencies import get_access_token, get_context +from auth.google_auth import get_authenticated_google_service, GoogleAuthenticationError +from auth.oauth21_session_store import ( + get_auth_provider, + get_oauth21_session_store, + ensure_session_from_access_token, +) +from auth.oauth_config import ( + is_oauth21_enabled, + get_oauth_config, + is_external_oauth21_provider, +) +from core.context import set_fastmcp_session_id +from auth.scopes import ( + GMAIL_READONLY_SCOPE, + GMAIL_SEND_SCOPE, + GMAIL_COMPOSE_SCOPE, + GMAIL_MODIFY_SCOPE, + GMAIL_LABELS_SCOPE, + GMAIL_SETTINGS_BASIC_SCOPE, + DRIVE_SCOPE, + DRIVE_READONLY_SCOPE, + DRIVE_FILE_SCOPE, + DOCS_READONLY_SCOPE, + DOCS_WRITE_SCOPE, + CALENDAR_READONLY_SCOPE, + CALENDAR_EVENTS_SCOPE, + SHEETS_READONLY_SCOPE, + SHEETS_WRITE_SCOPE, + CHAT_READONLY_SCOPE, + CHAT_WRITE_SCOPE, + CHAT_SPACES_SCOPE, + CHAT_SPACES_READONLY_SCOPE, + FORMS_BODY_SCOPE, + FORMS_BODY_READONLY_SCOPE, + FORMS_RESPONSES_READONLY_SCOPE, + SLIDES_SCOPE, + SLIDES_READONLY_SCOPE, + TASKS_SCOPE, + TASKS_READONLY_SCOPE, + CONTACTS_SCOPE, + CONTACTS_READONLY_SCOPE, + CUSTOM_SEARCH_SCOPE, + SCRIPT_PROJECTS_SCOPE, + SCRIPT_PROJECTS_READONLY_SCOPE, + SCRIPT_DEPLOYMENTS_SCOPE, + SCRIPT_DEPLOYMENTS_READONLY_SCOPE, + has_required_scopes, +) + +logger = logging.getLogger(__name__) + + +# Authentication helper functions +async def _get_auth_context( + tool_name: str, +) -> Tuple[Optional[str], Optional[str], Optional[str]]: + """ + Get authentication context from FastMCP. + + Returns: + Tuple of (authenticated_user, auth_method, mcp_session_id) + """ + try: + ctx = get_context() + if not ctx: + return None, None, None + + authenticated_user = await ctx.get_state("authenticated_user_email") + auth_method = await ctx.get_state("authenticated_via") + mcp_session_id = ctx.session_id if hasattr(ctx, "session_id") else None + + if mcp_session_id: + set_fastmcp_session_id(mcp_session_id) + + logger.info( + f"[{tool_name}] Auth from middleware: authenticated_user={authenticated_user}, auth_method={auth_method}, session_id={mcp_session_id}" + ) + return authenticated_user, auth_method, mcp_session_id + + except Exception as e: + logger.debug(f"[{tool_name}] Could not get FastMCP context: {e}") + return None, None, None + + +def _detect_oauth_version( + authenticated_user: Optional[str], mcp_session_id: Optional[str], tool_name: str +) -> bool: + """ + Detect whether to use OAuth 2.1 based on configuration and context. + + Returns: + True if OAuth 2.1 should be used, False otherwise + """ + if not is_oauth21_enabled(): + return False + + # When OAuth 2.1 is enabled globally, ALWAYS use OAuth 2.1 for authenticated users + if authenticated_user: + logger.info( + f"[{tool_name}] OAuth 2.1 mode: Using OAuth 2.1 for authenticated user '{authenticated_user}'" + ) + return True + + # If FastMCP protocol-level auth is enabled, a validated access token should + # be available even if middleware state wasn't populated. + try: + if get_access_token() is not None: + logger.info( + f"[{tool_name}] OAuth 2.1 mode: Using OAuth 2.1 based on validated access token" + ) + return True + except Exception as e: + logger.debug( + f"[{tool_name}] Could not inspect access token for OAuth mode: {e}" + ) + + # Only use version detection for unauthenticated requests + config = get_oauth_config() + request_params = {} + if mcp_session_id: + request_params["session_id"] = mcp_session_id + + oauth_version = config.detect_oauth_version(request_params) + use_oauth21 = oauth_version == "oauth21" + logger.info( + f"[{tool_name}] OAuth version detected: {oauth_version}, will use OAuth 2.1: {use_oauth21}" + ) + return use_oauth21 + + +def _update_email_in_args(args: tuple, index: int, new_email: str) -> tuple: + """Update email at specific index in args tuple.""" + if index < len(args): + args_list = list(args) + args_list[index] = new_email + return tuple(args_list) + return args + + +def _override_oauth21_user_email( + use_oauth21: bool, + authenticated_user: Optional[str], + current_user_email: str, + args: tuple, + kwargs: dict, + param_names: List[str], + tool_name: str, + service_type: str = "", +) -> Tuple[str, tuple]: + """ + Override user_google_email with authenticated user when using OAuth 2.1. + + Returns: + Tuple of (updated_user_email, updated_args) + """ + if not ( + use_oauth21 and authenticated_user and current_user_email != authenticated_user + ): + return current_user_email, args + + service_suffix = f" for service '{service_type}'" if service_type else "" + logger.info( + f"[{tool_name}] OAuth 2.1: Overriding user_google_email from '{current_user_email}' to authenticated user '{authenticated_user}'{service_suffix}" + ) + + # Update in kwargs if present + if "user_google_email" in kwargs: + kwargs["user_google_email"] = authenticated_user + + # Update in args if user_google_email is passed positionally + try: + user_email_index = param_names.index("user_google_email") + args = _update_email_in_args(args, user_email_index, authenticated_user) + except ValueError: + pass # user_google_email not in positional parameters + + return authenticated_user, args + + +async def _authenticate_service( + use_oauth21: bool, + service_name: str, + service_version: str, + tool_name: str, + user_google_email: str, + resolved_scopes: List[str], + mcp_session_id: Optional[str], + authenticated_user: Optional[str], +) -> Tuple[Any, str]: + """ + Authenticate and get Google service using appropriate OAuth version. + + Returns: + Tuple of (service, actual_user_email) + """ + if use_oauth21: + logger.debug(f"[{tool_name}] Using OAuth 2.1 flow") + return await get_authenticated_google_service_oauth21( + service_name=service_name, + version=service_version, + tool_name=tool_name, + user_google_email=user_google_email, + required_scopes=resolved_scopes, + session_id=mcp_session_id, + auth_token_email=authenticated_user, + allow_recent_auth=False, + ) + else: + logger.debug(f"[{tool_name}] Using legacy OAuth 2.0 flow") + return await get_authenticated_google_service( + service_name=service_name, + version=service_version, + tool_name=tool_name, + user_google_email=user_google_email, + required_scopes=resolved_scopes, + session_id=mcp_session_id, + ) + + +async def get_authenticated_google_service_oauth21( + service_name: str, + version: str, + tool_name: str, + user_google_email: str, + required_scopes: List[str], + session_id: Optional[str] = None, + auth_token_email: Optional[str] = None, + allow_recent_auth: bool = False, +) -> tuple[Any, str]: + """ + OAuth 2.1 authentication using the session store with security validation. + """ + provider = get_auth_provider() + access_token = get_access_token() + + if provider and access_token: + token_email = None + if getattr(access_token, "claims", None): + token_email = access_token.claims.get("email") + + resolved_email = token_email or auth_token_email or user_google_email + if not resolved_email: + raise GoogleAuthenticationError( + "Authenticated user email could not be determined from access token." + ) + + if auth_token_email and token_email and token_email != auth_token_email: + raise GoogleAuthenticationError( + "Access token email does not match authenticated session context." + ) + + if token_email and user_google_email and token_email != user_google_email: + raise GoogleAuthenticationError( + f"Authenticated account {token_email} does not match requested user {user_google_email}." + ) + + credentials = ensure_session_from_access_token( + access_token, resolved_email, session_id + ) + if not credentials: + raise GoogleAuthenticationError( + "Unable to build Google credentials from authenticated access token." + ) + + scopes_available = set(credentials.scopes or []) + if not scopes_available and getattr(access_token, "scopes", None): + scopes_available = set(access_token.scopes) + + if not has_required_scopes(scopes_available, required_scopes): + raise GoogleAuthenticationError( + f"OAuth credentials lack required scopes. Need: {required_scopes}, Have: {sorted(scopes_available)}" + ) + + service = build(service_name, version, credentials=credentials) + logger.info(f"[{tool_name}] Authenticated {service_name} for {resolved_email}") + return service, resolved_email + + store = get_oauth21_session_store() + + # Use the validation method to ensure session can only access its own credentials + credentials = store.get_credentials_with_validation( + requested_user_email=user_google_email, + session_id=session_id, + auth_token_email=auth_token_email, + allow_recent_auth=allow_recent_auth, + ) + + if not credentials: + raise GoogleAuthenticationError( + f"Access denied: Cannot retrieve credentials for {user_google_email}. " + f"You can only access credentials for your authenticated account." + ) + + if not credentials.scopes: + scopes_available = set(required_scopes) + else: + scopes_available = set(credentials.scopes) + + if not has_required_scopes(scopes_available, required_scopes): + raise GoogleAuthenticationError( + f"OAuth 2.1 credentials lack required scopes. Need: {required_scopes}, Have: {sorted(scopes_available)}" + ) + + service = build(service_name, version, credentials=credentials) + logger.info(f"[{tool_name}] Authenticated {service_name} for {user_google_email}") + + return service, user_google_email + + +def _extract_oauth21_user_email( + authenticated_user: Optional[str], func_name: str +) -> str: + """ + Extract user email for OAuth 2.1 mode. + + Args: + authenticated_user: The authenticated user from context + func_name: Name of the function being decorated (for error messages) + + Returns: + User email string + + Raises: + Exception: If no authenticated user found in OAuth 2.1 mode + """ + if not authenticated_user: + raise Exception( + f"OAuth 2.1 mode requires an authenticated user for {func_name}, but none was found." + ) + return authenticated_user + + +def _extract_oauth20_user_email( + args: tuple, kwargs: dict, wrapper_sig: inspect.Signature +) -> str: + """ + Extract user email for OAuth 2.0 mode from function arguments. + + Args: + args: Positional arguments passed to wrapper + kwargs: Keyword arguments passed to wrapper + wrapper_sig: Function signature for parameter binding + + Returns: + User email string + + Raises: + Exception: If user_google_email parameter not found + """ + bound_args = wrapper_sig.bind(*args, **kwargs) + bound_args.apply_defaults() + + user_google_email = bound_args.arguments.get("user_google_email") + if not user_google_email: + raise Exception("'user_google_email' parameter is required but was not found.") + return user_google_email + + +def _remove_user_email_arg_from_docstring(docstring: str) -> str: + """ + Remove user_google_email parameter documentation from docstring. + + Args: + docstring: The original function docstring + + Returns: + Modified docstring with user_google_email parameter removed + """ + if not docstring: + return docstring + + # Pattern to match user_google_email parameter documentation + # Handles various formats like: + # - user_google_email (str): The user's Google email address. Required. + # - user_google_email: Description + # - user_google_email (str) - Description + patterns = [ + r"^\s*user_google_email\s*\([^)]*\)\s*:\s*[^\n]*\.?\s*(?:Required\.?)?\s*\n", + r"^\s*user_google_email\s*:\s*[^\n]*\n", + r"^\s*user_google_email\s*\([^)]*\)\s*-\s*[^\n]*\n", + ] + + modified_docstring = docstring + for pattern in patterns: + modified_docstring = re.sub(pattern, "", modified_docstring, flags=re.MULTILINE) + + # Clean up any sequence of 3 or more newlines that might have been created + modified_docstring = re.sub(r"\n{3,}", "\n\n", modified_docstring) + return modified_docstring + + +# Service configuration mapping +SERVICE_CONFIGS = { + "gmail": {"service": "gmail", "version": "v1"}, + "drive": {"service": "drive", "version": "v3"}, + "calendar": {"service": "calendar", "version": "v3"}, + "docs": {"service": "docs", "version": "v1"}, + "sheets": {"service": "sheets", "version": "v4"}, + "chat": {"service": "chat", "version": "v1"}, + "forms": {"service": "forms", "version": "v1"}, + "slides": {"service": "slides", "version": "v1"}, + "tasks": {"service": "tasks", "version": "v1"}, + "people": {"service": "people", "version": "v1"}, + "customsearch": {"service": "customsearch", "version": "v1"}, + "script": {"service": "script", "version": "v1"}, +} + + +# Scope group definitions for easy reference +SCOPE_GROUPS = { + # Gmail scopes + "gmail_read": GMAIL_READONLY_SCOPE, + "gmail_send": GMAIL_SEND_SCOPE, + "gmail_compose": GMAIL_COMPOSE_SCOPE, + "gmail_modify": GMAIL_MODIFY_SCOPE, + "gmail_labels": GMAIL_LABELS_SCOPE, + "gmail_settings_basic": GMAIL_SETTINGS_BASIC_SCOPE, + # Drive scopes + "drive": DRIVE_SCOPE, + "drive_read": DRIVE_READONLY_SCOPE, + "drive_file": DRIVE_FILE_SCOPE, + # Docs scopes + "docs_read": DOCS_READONLY_SCOPE, + "docs_write": DOCS_WRITE_SCOPE, + # Calendar scopes + "calendar_read": CALENDAR_READONLY_SCOPE, + "calendar_events": CALENDAR_EVENTS_SCOPE, + # Sheets scopes + "sheets_read": SHEETS_READONLY_SCOPE, + "sheets_write": SHEETS_WRITE_SCOPE, + # Chat scopes + "chat_read": CHAT_READONLY_SCOPE, + "chat_write": CHAT_WRITE_SCOPE, + "chat_spaces": CHAT_SPACES_SCOPE, + "chat_spaces_readonly": CHAT_SPACES_READONLY_SCOPE, + # Forms scopes + "forms": FORMS_BODY_SCOPE, + "forms_read": FORMS_BODY_READONLY_SCOPE, + "forms_responses_read": FORMS_RESPONSES_READONLY_SCOPE, + # Slides scopes + "slides": SLIDES_SCOPE, + "slides_read": SLIDES_READONLY_SCOPE, + # Tasks scopes + "tasks": TASKS_SCOPE, + "tasks_read": TASKS_READONLY_SCOPE, + # Contacts scopes + "contacts": CONTACTS_SCOPE, + "contacts_read": CONTACTS_READONLY_SCOPE, + # Custom Search scope + "customsearch": CUSTOM_SEARCH_SCOPE, + # Apps Script scopes + "script_readonly": SCRIPT_PROJECTS_READONLY_SCOPE, + "script_projects": SCRIPT_PROJECTS_SCOPE, + "script_deployments": SCRIPT_DEPLOYMENTS_SCOPE, + "script_deployments_readonly": SCRIPT_DEPLOYMENTS_READONLY_SCOPE, +} + + +def _resolve_scopes(scopes: Union[str, List[str]]) -> List[str]: + """Resolve scope names to actual scope URLs.""" + if isinstance(scopes, str): + if scopes in SCOPE_GROUPS: + return [SCOPE_GROUPS[scopes]] + else: + return [scopes] + + resolved = [] + for scope in scopes: + if scope in SCOPE_GROUPS: + resolved.append(SCOPE_GROUPS[scope]) + else: + resolved.append(scope) + return resolved + + +def _handle_token_refresh_error( + error: RefreshError, user_email: str, service_name: str +) -> str: + """ + Handle token refresh errors gracefully, particularly expired/revoked tokens. + + Args: + error: The RefreshError that occurred + user_email: User's email address + service_name: Name of the Google service + + Returns: + A user-friendly error message with instructions for reauthentication + """ + error_str = str(error) + + if ( + "invalid_grant" in error_str.lower() + or "expired or revoked" in error_str.lower() + ): + logger.warning( + f"Token expired or revoked for user {user_email} accessing {service_name}" + ) + + service_display_name = f"Google {service_name.title()}" + if is_oauth21_enabled(): + if is_external_oauth21_provider(): + oauth21_step = ( + "Provide a valid OAuth 2.1 bearer token in the Authorization header" + ) + else: + oauth21_step = "Sign in through your MCP client's OAuth 2.1 flow" + + return ( + f"**Authentication Required: Token Expired/Revoked for {service_display_name}**\n\n" + f"Your Google authentication token for {user_email} has expired or been revoked. " + f"This commonly happens when:\n" + f"- The token has been unused for an extended period\n" + f"- You've changed your Google account password\n" + f"- You've revoked access to the application\n\n" + f"**To resolve this, please:**\n" + f"1. {oauth21_step}\n" + f"2. Retry your original command\n\n" + f"The application will automatically use the new credentials once authentication is complete." + ) + + return ( + f"**Authentication Required: Token Expired/Revoked for {service_display_name}**\n\n" + f"Your Google authentication token for {user_email} has expired or been revoked. " + f"This commonly happens when:\n" + f"- The token has been unused for an extended period\n" + f"- You've changed your Google account password\n" + f"- You've revoked access to the application\n\n" + f"**To resolve this, please:**\n" + f"1. Run `start_google_auth` with your email ({user_email}) and service_name='{service_display_name}'\n" + f"2. Complete the authentication flow in your browser\n" + f"3. Retry your original command\n\n" + f"The application will automatically use the new credentials once authentication is complete." + ) + else: + # Handle other types of refresh errors + logger.error(f"Unexpected refresh error for user {user_email}: {error}") + if is_oauth21_enabled(): + if is_external_oauth21_provider(): + return ( + f"Authentication error occurred for {user_email}. " + "Please provide a valid OAuth 2.1 bearer token and retry." + ) + return ( + f"Authentication error occurred for {user_email}. " + "Please sign in via your MCP client's OAuth 2.1 flow and retry." + ) + return ( + f"Authentication error occurred for {user_email}. " + f"Please try running `start_google_auth` with your email and the appropriate service name to reauthenticate." + ) + + +def require_google_service( + service_type: str, + scopes: Union[str, List[str]], + version: Optional[str] = None, +): + """ + Decorator that automatically handles Google service authentication and injection. + + Args: + service_type: Type of Google service ("gmail", "drive", "calendar", etc.) + scopes: Required scopes (can be scope group names or actual URLs) + version: Service version (defaults to standard version for service type) + + Usage: + @require_google_service("gmail", "gmail_read") + async def search_messages(service, user_google_email: str, query: str): + # service parameter is automatically injected + # Original authentication logic is handled automatically + """ + + def decorator(func: Callable) -> Callable: + original_sig = inspect.signature(func) + params = list(original_sig.parameters.values()) + + # The decorated function must have 'service' as its first parameter. + if not params or params[0].name != "service": + raise TypeError( + f"Function '{func.__name__}' decorated with @require_google_service " + "must have 'service' as its first parameter." + ) + + # Create a new signature for the wrapper that excludes the 'service' parameter. + # In OAuth 2.1 mode, also exclude 'user_google_email' since it's automatically determined. + if is_oauth21_enabled(): + # Remove both 'service' and 'user_google_email' parameters + filtered_params = [p for p in params[1:] if p.name != "user_google_email"] + wrapper_sig = original_sig.replace(parameters=filtered_params) + else: + # Only remove 'service' parameter for OAuth 2.0 mode + wrapper_sig = original_sig.replace(parameters=params[1:]) + + @wraps(func) + async def wrapper(*args, **kwargs): + # Note: `args` and `kwargs` are now the arguments for the *wrapper*, + # which does not include 'service'. + + # Get authentication context early to determine OAuth mode + authenticated_user, auth_method, mcp_session_id = await _get_auth_context( + func.__name__ + ) + + # Extract user_google_email based on OAuth mode + if is_oauth21_enabled(): + user_google_email = _extract_oauth21_user_email( + authenticated_user, func.__name__ + ) + else: + user_google_email = _extract_oauth20_user_email( + args, kwargs, wrapper_sig + ) + + # Get service configuration from the decorator's arguments + if service_type not in SERVICE_CONFIGS: + raise Exception(f"Unknown service type: {service_type}") + + config = SERVICE_CONFIGS[service_type] + service_name = config["service"] + service_version = version or config["version"] + + # Resolve scopes + resolved_scopes = _resolve_scopes(scopes) + + try: + tool_name = func.__name__ + + # Log authentication status + logger.debug( + f"[{tool_name}] Auth: {authenticated_user or 'none'} via {auth_method or 'none'} (session: {mcp_session_id[:8] if mcp_session_id else 'none'})" + ) + + # Detect OAuth version + use_oauth21 = _detect_oauth_version( + authenticated_user, mcp_session_id, tool_name + ) + + # In OAuth 2.1 mode, user_google_email is already set to authenticated_user + # In OAuth 2.0 mode, we may need to override it + if not is_oauth21_enabled(): + wrapper_params = list(wrapper_sig.parameters.keys()) + user_google_email, args = _override_oauth21_user_email( + use_oauth21, + authenticated_user, + user_google_email, + args, + kwargs, + wrapper_params, + tool_name, + ) + + # Authenticate service + service, actual_user_email = await _authenticate_service( + use_oauth21, + service_name, + service_version, + tool_name, + user_google_email, + resolved_scopes, + mcp_session_id, + authenticated_user, + ) + except GoogleAuthenticationError as e: + logger.error( + f"[{tool_name}] GoogleAuthenticationError during authentication. " + f"Method={auth_method or 'none'}, User={authenticated_user or 'none'}, " + f"Service={service_name} v{service_version}, MCPSessionID={mcp_session_id or 'none'}: {e}" + ) + # Re-raise the original error without wrapping it + raise + + try: + # In OAuth 2.1 mode, we need to add user_google_email to kwargs since it was removed from signature + if is_oauth21_enabled(): + kwargs["user_google_email"] = user_google_email + + # Prepend the fetched service object to the original arguments + return await func(service, *args, **kwargs) + except RefreshError as e: + error_message = _handle_token_refresh_error( + e, actual_user_email, service_name + ) + raise GoogleAuthenticationError(error_message) + finally: + if service: + service.close() + + # Set the wrapper's signature to the one without 'service' + wrapper.__signature__ = wrapper_sig + + # Conditionally modify docstring to remove user_google_email parameter documentation + if is_oauth21_enabled(): + logger.debug( + "OAuth 2.1 mode enabled, removing user_google_email from docstring" + ) + if func.__doc__: + wrapper.__doc__ = _remove_user_email_arg_from_docstring(func.__doc__) + + # Attach required scopes to the wrapper for tool filtering + wrapper._required_google_scopes = _resolve_scopes(scopes) + + return wrapper + + return decorator + + +def require_multiple_services(service_configs: List[Dict[str, Any]]): + """ + Decorator for functions that need multiple Google services. + + Args: + service_configs: List of service configurations, each containing: + - service_type: Type of service + - scopes: Required scopes + - param_name: Name to inject service as (e.g., 'drive_service', 'docs_service') + - version: Optional version override + + Usage: + @require_multiple_services([ + {"service_type": "drive", "scopes": "drive_read", "param_name": "drive_service"}, + {"service_type": "docs", "scopes": "docs_read", "param_name": "docs_service"} + ]) + async def get_doc_with_metadata(drive_service, docs_service, user_google_email: str, doc_id: str): + # Both services are automatically injected + """ + + def decorator(func: Callable) -> Callable: + original_sig = inspect.signature(func) + + service_param_names = {config["param_name"] for config in service_configs} + params = list(original_sig.parameters.values()) + + # Remove injected service params from the wrapper signature; drop user_google_email only for OAuth 2.1. + filtered_params = [p for p in params if p.name not in service_param_names] + if is_oauth21_enabled(): + filtered_params = [ + p for p in filtered_params if p.name != "user_google_email" + ] + + wrapper_sig = original_sig.replace(parameters=filtered_params) + wrapper_param_names = [p.name for p in filtered_params] + + @wraps(func) + async def wrapper(*args, **kwargs): + # Get authentication context early + tool_name = func.__name__ + authenticated_user, _, mcp_session_id = await _get_auth_context(tool_name) + + # Extract user_google_email based on OAuth mode + if is_oauth21_enabled(): + user_google_email = _extract_oauth21_user_email( + authenticated_user, tool_name + ) + else: + user_google_email = _extract_oauth20_user_email( + args, kwargs, wrapper_sig + ) + + # Authenticate all services + with ExitStack() as stack: + for config in service_configs: + service_type = config["service_type"] + scopes = config["scopes"] + param_name = config["param_name"] + version = config.get("version") + + if service_type not in SERVICE_CONFIGS: + raise Exception(f"Unknown service type: {service_type}") + + service_config = SERVICE_CONFIGS[service_type] + service_name = service_config["service"] + service_version = version or service_config["version"] + resolved_scopes = _resolve_scopes(scopes) + + try: + # Detect OAuth version (simplified for multiple services) + use_oauth21 = ( + is_oauth21_enabled() and authenticated_user is not None + ) + + # In OAuth 2.0 mode, we may need to override user_google_email + if not is_oauth21_enabled(): + user_google_email, args = _override_oauth21_user_email( + use_oauth21, + authenticated_user, + user_google_email, + args, + kwargs, + wrapper_param_names, + tool_name, + service_type, + ) + + # Authenticate service + service, _ = await _authenticate_service( + use_oauth21, + service_name, + service_version, + tool_name, + user_google_email, + resolved_scopes, + mcp_session_id, + authenticated_user, + ) + + # Inject service with specified parameter name + kwargs[param_name] = service + stack.callback(service.close) + + except GoogleAuthenticationError as e: + logger.error( + f"[{tool_name}] GoogleAuthenticationError for service '{service_type}' (user: {user_google_email}): {e}" + ) + # Re-raise the original error without wrapping it + raise + + # Call the original function with refresh error handling + try: + # In OAuth 2.1 mode, we need to add user_google_email to kwargs since it was removed from signature + if is_oauth21_enabled(): + kwargs["user_google_email"] = user_google_email + + return await func(*args, **kwargs) + except RefreshError as e: + # Handle token refresh errors gracefully + error_message = _handle_token_refresh_error( + e, user_google_email, "Multiple Services" + ) + raise GoogleAuthenticationError(error_message) + + # Set the wrapper's signature + wrapper.__signature__ = wrapper_sig + + # Conditionally modify docstring to remove user_google_email parameter documentation + if is_oauth21_enabled(): + logger.debug( + "OAuth 2.1 mode enabled, removing user_google_email from docstring" + ) + if func.__doc__: + wrapper.__doc__ = _remove_user_email_arg_from_docstring(func.__doc__) + + # Attach all required scopes to the wrapper for tool filtering + all_scopes = [] + for config in service_configs: + all_scopes.extend(_resolve_scopes(config["scopes"])) + wrapper._required_google_scopes = all_scopes + + return wrapper + + return decorator diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..b320b74 --- /dev/null +++ b/core/__init__.py @@ -0,0 +1 @@ +# Make the core directory a Python package diff --git a/core/api_enablement.py b/core/api_enablement.py new file mode 100644 index 0000000..e5f493d --- /dev/null +++ b/core/api_enablement.py @@ -0,0 +1,108 @@ +import re +from typing import Dict, Optional, Tuple + + +API_ENABLEMENT_LINKS: Dict[str, str] = { + "calendar-json.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=calendar-json.googleapis.com", + "drive.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com", + "gmail.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=gmail.googleapis.com", + "docs.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=docs.googleapis.com", + "sheets.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=sheets.googleapis.com", + "slides.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=slides.googleapis.com", + "forms.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=forms.googleapis.com", + "tasks.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=tasks.googleapis.com", + "chat.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=chat.googleapis.com", + "customsearch.googleapis.com": "https://console.cloud.google.com/flows/enableapi?apiid=customsearch.googleapis.com", +} + + +SERVICE_NAME_TO_API: Dict[str, str] = { + "Google Calendar": "calendar-json.googleapis.com", + "Google Drive": "drive.googleapis.com", + "Gmail": "gmail.googleapis.com", + "Google Docs": "docs.googleapis.com", + "Google Sheets": "sheets.googleapis.com", + "Google Slides": "slides.googleapis.com", + "Google Forms": "forms.googleapis.com", + "Google Tasks": "tasks.googleapis.com", + "Google Chat": "chat.googleapis.com", + "Google Custom Search": "customsearch.googleapis.com", +} + + +INTERNAL_SERVICE_TO_API: Dict[str, str] = { + "calendar": "calendar-json.googleapis.com", + "drive": "drive.googleapis.com", + "gmail": "gmail.googleapis.com", + "docs": "docs.googleapis.com", + "sheets": "sheets.googleapis.com", + "slides": "slides.googleapis.com", + "forms": "forms.googleapis.com", + "tasks": "tasks.googleapis.com", + "chat": "chat.googleapis.com", + "customsearch": "customsearch.googleapis.com", + "search": "customsearch.googleapis.com", +} + + +def extract_api_info_from_error( + error_details: str, +) -> Tuple[Optional[str], Optional[str]]: + """ + Extract API service and project ID from error details. + + Returns: + Tuple of (api_service, project_id) or (None, None) if not found + """ + api_pattern = r"https://console\.developers\.google\.com/apis/api/([^/]+)/overview" + project_pattern = r"project[=\s]+([a-zA-Z0-9-]+)" + + api_match = re.search(api_pattern, error_details) + project_match = re.search(project_pattern, error_details) + + api_service = api_match.group(1) if api_match else None + project_id = project_match.group(1) if project_match else None + + return api_service, project_id + + +def get_api_enablement_message( + error_details: str, service_type: Optional[str] = None +) -> str: + """ + Generate a helpful error message with direct API enablement link. + + Args: + error_details: The error details string from the HttpError + service_type: Optional service type (e.g., "calendar", "gmail", or "Google Calendar") + + Returns: + Formatted error message with enablement link + """ + api_service, project_id = extract_api_info_from_error(error_details) + + if not api_service and service_type: + # Check internal service names first (e.g., "calendar", "gmail") + api_service = INTERNAL_SERVICE_TO_API.get(service_type) + if not api_service: + # Check display names (e.g., "Google Calendar") + api_service = SERVICE_NAME_TO_API.get(service_type) + + if api_service and api_service in API_ENABLEMENT_LINKS: + enable_link = API_ENABLEMENT_LINKS[api_service] + service_display_name = next( + (name for name, api in SERVICE_NAME_TO_API.items() if api == api_service), + api_service, + ) + + message = ( + f"{service_display_name} API is not enabled for your project" + f"{f' ({project_id})' if project_id else ''}.\n\n" + f"Enable it here: {enable_link}\n\n" + f"After enabling, wait 1-2 minutes for the change to propagate, then try again." + f"IMPORTANT - LLM: share the link provided as a clickable hyperlink and instruct the user to enable the required API." + ) + + return message + + return "" diff --git a/core/attachment_storage.py b/core/attachment_storage.py new file mode 100644 index 0000000..6448574 --- /dev/null +++ b/core/attachment_storage.py @@ -0,0 +1,262 @@ +""" +Temporary attachment storage for Gmail attachments. + +Stores attachments to local disk and returns file paths for direct access. +Files are automatically cleaned up after expiration (default 1 hour). +""" + +import base64 +import logging +import os +import uuid +from pathlib import Path +from typing import NamedTuple, Optional, Dict +from datetime import datetime, timedelta + +logger = logging.getLogger(__name__) + +# Default expiration: 1 hour +DEFAULT_EXPIRATION_SECONDS = 3600 + +# Storage directory - configurable via WORKSPACE_ATTACHMENT_DIR env var +# Uses absolute path to avoid creating tmp/ in arbitrary working directories (see #327) +_default_dir = str(Path.home() / ".workspace-mcp" / "attachments") +STORAGE_DIR = ( + Path(os.getenv("WORKSPACE_ATTACHMENT_DIR", _default_dir)).expanduser().resolve() +) + + +def _ensure_storage_dir() -> None: + """Create the storage directory on first use, not at import time.""" + STORAGE_DIR.mkdir(parents=True, exist_ok=True, mode=0o700) + + +class SavedAttachment(NamedTuple): + """Result of saving an attachment: provides both the UUID and the absolute file path.""" + + file_id: str + path: str + + +class AttachmentStorage: + """Manages temporary storage of email attachments.""" + + def __init__(self, expiration_seconds: int = DEFAULT_EXPIRATION_SECONDS): + self.expiration_seconds = expiration_seconds + self._metadata: Dict[str, Dict] = {} + + def save_attachment( + self, + base64_data: str, + filename: Optional[str] = None, + mime_type: Optional[str] = None, + ) -> SavedAttachment: + """ + Save an attachment to local disk. + + Args: + base64_data: Base64-encoded attachment data + filename: Original filename (optional) + mime_type: MIME type (optional) + + Returns: + SavedAttachment with file_id (UUID) and path (absolute file path) + """ + _ensure_storage_dir() + + # Generate unique file ID for metadata tracking + file_id = str(uuid.uuid4()) + + # Decode base64 data + try: + file_bytes = base64.urlsafe_b64decode(base64_data) + except Exception as e: + logger.error(f"Failed to decode base64 attachment data: {e}") + raise ValueError(f"Invalid base64 data: {e}") + + # Determine file extension from filename or mime type + extension = "" + if filename: + extension = Path(filename).suffix + elif mime_type: + # Basic mime type to extension mapping + mime_to_ext = { + "image/jpeg": ".jpg", + "image/png": ".png", + "image/gif": ".gif", + "application/pdf": ".pdf", + "application/zip": ".zip", + "text/plain": ".txt", + "text/html": ".html", + } + extension = mime_to_ext.get(mime_type, "") + + # Use original filename if available, with UUID suffix for uniqueness + if filename: + stem = Path(filename).stem + ext = Path(filename).suffix + save_name = f"{stem}_{file_id[:8]}{ext}" + else: + save_name = f"{file_id}{extension}" + + # Save file with restrictive permissions (sensitive email/drive content) + file_path = STORAGE_DIR / save_name + try: + fd = os.open( + file_path, + os.O_WRONLY | os.O_CREAT | os.O_TRUNC | getattr(os, "O_BINARY", 0), + 0o600, + ) + try: + total_written = 0 + data_len = len(file_bytes) + while total_written < data_len: + written = os.write(fd, file_bytes[total_written:]) + if written == 0: + raise OSError( + "os.write returned 0 bytes; could not write attachment data" + ) + total_written += written + finally: + os.close(fd) + logger.info( + f"Saved attachment file_id={file_id} filename={filename or save_name} " + f"({len(file_bytes)} bytes) to {file_path}" + ) + except Exception as e: + logger.error( + f"Failed to save attachment file_id={file_id} " + f"filename={filename or save_name} to {file_path}: {e}" + ) + raise + + # Store metadata + expires_at = datetime.now() + timedelta(seconds=self.expiration_seconds) + self._metadata[file_id] = { + "file_path": str(file_path), + "filename": filename or f"attachment{extension}", + "mime_type": mime_type or "application/octet-stream", + "size": len(file_bytes), + "created_at": datetime.now(), + "expires_at": expires_at, + } + + return SavedAttachment(file_id=file_id, path=str(file_path)) + + def get_attachment_path(self, file_id: str) -> Optional[Path]: + """ + Get the file path for an attachment ID. + + Args: + file_id: Unique file ID + + Returns: + Path object if file exists and not expired, None otherwise + """ + if file_id not in self._metadata: + logger.warning(f"Attachment {file_id} not found in metadata") + return None + + metadata = self._metadata[file_id] + file_path = Path(metadata["file_path"]) + + # Check if expired + if datetime.now() > metadata["expires_at"]: + logger.info(f"Attachment {file_id} has expired, cleaning up") + self._cleanup_file(file_id) + return None + + # Check if file exists + if not file_path.exists(): + logger.warning(f"Attachment file {file_path} does not exist") + del self._metadata[file_id] + return None + + return file_path + + def get_attachment_metadata(self, file_id: str) -> Optional[Dict]: + """ + Get metadata for an attachment. + + Args: + file_id: Unique file ID + + Returns: + Metadata dict if exists and not expired, None otherwise + """ + if file_id not in self._metadata: + return None + + metadata = self._metadata[file_id].copy() + + # Check if expired + if datetime.now() > metadata["expires_at"]: + self._cleanup_file(file_id) + return None + + return metadata + + def _cleanup_file(self, file_id: str) -> None: + """Remove file and metadata.""" + if file_id in self._metadata: + file_path = Path(self._metadata[file_id]["file_path"]) + try: + if file_path.exists(): + file_path.unlink() + logger.debug(f"Deleted expired attachment file: {file_path}") + except Exception as e: + logger.warning(f"Failed to delete attachment file {file_path}: {e}") + del self._metadata[file_id] + + def cleanup_expired(self) -> int: + """ + Clean up expired attachments. + + Returns: + Number of files cleaned up + """ + now = datetime.now() + expired_ids = [ + file_id + for file_id, metadata in self._metadata.items() + if now > metadata["expires_at"] + ] + + for file_id in expired_ids: + self._cleanup_file(file_id) + + return len(expired_ids) + + +# Global instance +_attachment_storage: Optional[AttachmentStorage] = None + + +def get_attachment_storage() -> AttachmentStorage: + """Get the global attachment storage instance.""" + global _attachment_storage + if _attachment_storage is None: + _attachment_storage = AttachmentStorage() + return _attachment_storage + + +def get_attachment_url(file_id: str) -> str: + """ + Generate a URL for accessing an attachment. + + Args: + file_id: Unique file ID + + Returns: + Full URL to access the attachment + """ + from core.config import WORKSPACE_MCP_PORT, WORKSPACE_MCP_BASE_URI + + # Use external URL if set (for reverse proxy scenarios) + external_url = os.getenv("WORKSPACE_EXTERNAL_URL") + if external_url: + base_url = external_url.rstrip("/") + else: + base_url = f"{WORKSPACE_MCP_BASE_URI}:{WORKSPACE_MCP_PORT}" + + return f"{base_url}/attachments/{file_id}" diff --git a/core/cli_handler.py b/core/cli_handler.py new file mode 100644 index 0000000..3627463 --- /dev/null +++ b/core/cli_handler.py @@ -0,0 +1,410 @@ +""" +CLI Handler for Google Workspace MCP + +This module provides a command-line interface mode for directly invoking +MCP tools without running the full server. Designed for use by coding agents +(Codex, Claude Code) and command-line users. + +Usage: + workspace-mcp --cli # List available tools + workspace-mcp --cli list # List available tools + workspace-mcp --cli # Run tool (reads JSON args from stdin) + workspace-mcp --cli --args '{"key": "value"}' # Run with inline args + workspace-mcp --cli --help # Show tool details +""" + +import asyncio +import json +import logging +import sys +from typing import Any, Dict, List, Optional + +from auth.oauth_config import set_transport_mode +from core.tool_registry import get_tool_components + +logger = logging.getLogger(__name__) + + +def get_registered_tools(server) -> Dict[str, Any]: + """ + Get all registered tools from the FastMCP server. + + Args: + server: The FastMCP server instance + + Returns: + Dictionary mapping tool names to their metadata + """ + tools = {} + + for name, tool in get_tool_components(server).items(): + tools[name] = { + "name": name, + "description": getattr(tool, "description", None) + or _extract_docstring(tool), + "parameters": _extract_parameters(tool), + "tool_obj": tool, + } + + return tools + + +def _extract_docstring(tool) -> Optional[str]: + """Extract the first meaningful line of a tool's docstring as its description.""" + fn = getattr(tool, "fn", None) or tool + if fn and fn.__doc__: + # Get first non-empty line that's not just "Args:" etc. + for line in fn.__doc__.strip().split("\n"): + line = line.strip() + # Skip empty lines and common section headers + if line and not line.startswith( + ("Args:", "Returns:", "Raises:", "Example", "Note:") + ): + return line + return None + + +def _extract_parameters(tool) -> Dict[str, Any]: + """Extract parameter information from a tool.""" + params = {} + + # Try to get parameters from the tool's schema + if hasattr(tool, "parameters"): + schema = tool.parameters + if isinstance(schema, dict): + props = schema.get("properties", {}) + required = set(schema.get("required", [])) + for name, prop in props.items(): + params[name] = { + "type": prop.get("type", "any"), + "description": prop.get("description", ""), + "required": name in required, + "default": prop.get("default"), + } + + return params + + +def list_tools(server, output_format: str = "text") -> str: + """ + List all available tools. + + Args: + server: The FastMCP server instance + output_format: Output format ("text" or "json") + + Returns: + Formatted string listing all tools + """ + tools = get_registered_tools(server) + + if output_format == "json": + # Return JSON format for programmatic use + tool_list = [] + for name, info in sorted(tools.items()): + tool_list.append( + { + "name": name, + "description": info["description"], + "parameters": info["parameters"], + } + ) + return json.dumps({"tools": tool_list}, indent=2) + + # Text format for human reading + lines = [ + f"Available tools ({len(tools)}):", + "", + ] + + # Group tools by service + services = {} + for name, info in tools.items(): + # Extract service prefix from tool name + prefix = name.split("_")[0] if "_" in name else "other" + if prefix not in services: + services[prefix] = [] + services[prefix].append((name, info)) + + for service in sorted(services.keys()): + lines.append(f" {service.upper()}:") + for name, info in sorted(services[service]): + desc = info["description"] or "(no description)" + # Get first line only and truncate + first_line = desc.split("\n")[0].strip() + if len(first_line) > 70: + first_line = first_line[:67] + "..." + lines.append(f" {name}") + lines.append(f" {first_line}") + lines.append("") + + lines.append("Use --cli --help for detailed tool information") + lines.append("Use --cli --args '{...}' to run a tool") + + return "\n".join(lines) + + +def show_tool_help(server, tool_name: str) -> str: + """ + Show detailed help for a specific tool. + + Args: + server: The FastMCP server instance + tool_name: Name of the tool + + Returns: + Formatted help string for the tool + """ + tools = get_registered_tools(server) + + if tool_name not in tools: + available = ", ".join(sorted(tools.keys())[:10]) + return f"Error: Tool '{tool_name}' not found.\n\nAvailable tools include: {available}..." + + tool_info = tools[tool_name] + tool_obj = tool_info["tool_obj"] + + # Get full docstring + fn = getattr(tool_obj, "fn", None) or tool_obj + docstring = fn.__doc__ if fn and fn.__doc__ else "(no documentation)" + + lines = [ + f"Tool: {tool_name}", + "=" * (len(tool_name) + 6), + "", + docstring, + "", + "Parameters:", + ] + + params = tool_info["parameters"] + if params: + for name, param_info in params.items(): + req = "(required)" if param_info.get("required") else "(optional)" + param_type = param_info.get("type", "any") + desc = param_info.get("description", "") + default = param_info.get("default") + + lines.append(f" {name}: {param_type} {req}") + if desc: + lines.append(f" {desc}") + if default is not None: + lines.append(f" Default: {default}") + else: + lines.append(" (no parameters)") + + lines.extend( + [ + "", + "Example usage:", + f' workspace-mcp --cli {tool_name} --args \'{{"param": "value"}}\'', + "", + "Or pipe JSON from stdin:", + f' echo \'{{"param": "value"}}\' | workspace-mcp --cli {tool_name}', + ] + ) + + return "\n".join(lines) + + +async def run_tool(server, tool_name: str, args: Dict[str, Any]) -> str: + """ + Execute a tool with the provided arguments. + + Args: + server: The FastMCP server instance + tool_name: Name of the tool to execute + args: Dictionary of arguments to pass to the tool + + Returns: + Tool result as a string + """ + tools = get_registered_tools(server) + + if tool_name not in tools: + raise ValueError(f"Tool '{tool_name}' not found") + + tool_info = tools[tool_name] + tool_obj = tool_info["tool_obj"] + + # Get the actual function to call + fn = getattr(tool_obj, "fn", None) + if fn is None: + raise ValueError(f"Tool '{tool_name}' has no callable function") + + call_args = dict(args) + + try: + logger.debug( + f"[CLI] Executing tool: {tool_name} with args: {list(call_args.keys())}" + ) + + # Call the tool function + if asyncio.iscoroutinefunction(fn): + result = await fn(**call_args) + else: + result = fn(**call_args) + + # Convert result to string if needed + if isinstance(result, str): + return result + else: + return json.dumps(result, indent=2, default=str) + + except TypeError as e: + # Provide helpful error for missing/invalid arguments + error_msg = str(e) + params = tool_info["parameters"] + required = [n for n, p in params.items() if p.get("required")] + + return ( + f"Error calling {tool_name}: {error_msg}\n\n" + f"Required parameters: {required}\n" + f"Provided parameters: {list(call_args.keys())}" + ) + except Exception as e: + logger.error(f"[CLI] Error executing {tool_name}: {e}", exc_info=True) + return f"Error: {type(e).__name__}: {e}" + + +def parse_cli_args(args: List[str]) -> Dict[str, Any]: + """ + Parse CLI arguments for tool execution. + + Args: + args: List of arguments after --cli + + Returns: + Dictionary with parsed values: + - command: "list", "help", or "run" + - tool_name: Name of tool (if applicable) + - tool_args: Arguments for the tool (if applicable) + - output_format: "text" or "json" + """ + result = { + "command": "list", + "tool_name": None, + "tool_args": {}, + "output_format": "text", + } + + if not args: + return result + + i = 0 + while i < len(args): + arg = args[i] + + if arg in ("list", "-l", "--list"): + result["command"] = "list" + i += 1 + elif arg in ("--json", "-j"): + result["output_format"] = "json" + i += 1 + elif arg in ("help", "--help", "-h"): + # Help command - if tool_name already set, show help for that tool + if result["tool_name"]: + result["command"] = "help" + else: + # Check if next arg is a tool name + if i + 1 < len(args) and not args[i + 1].startswith("-"): + result["tool_name"] = args[i + 1] + result["command"] = "help" + i += 1 + else: + # No tool specified, show general help + result["command"] = "list" + i += 1 + elif arg in ("--args", "-a") and i + 1 < len(args): + # Parse inline JSON arguments + json_str = args[i + 1] + try: + result["tool_args"] = json.loads(json_str) + except json.JSONDecodeError as e: + # Provide helpful debug info + raise ValueError( + f"Invalid JSON in --args: {e}\n" + f"Received: {repr(json_str)}\n" + f"Tip: Try using stdin instead: echo '' | workspace-mcp --cli " + ) + i += 2 + elif not arg.startswith("-") and not result["tool_name"]: + # First non-flag argument is the tool name + result["tool_name"] = arg + result["command"] = "run" + i += 1 + else: + i += 1 + + return result + + +def read_stdin_args() -> Dict[str, Any]: + """ + Read JSON arguments from stdin if available. + + Returns: + Dictionary of arguments or empty dict if stdin is a TTY or no data is provided. + """ + if sys.stdin.isatty(): + logger.debug("[CLI] stdin is a TTY; no JSON args will be read from stdin") + return {} + + try: + stdin_data = sys.stdin.read().strip() + if stdin_data: + return json.loads(stdin_data) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON from stdin: {e}") + + return {} + + +async def handle_cli_mode(server, cli_args: List[str]) -> int: + """ + Main entry point for CLI mode. + + Args: + server: The FastMCP server instance + cli_args: Arguments passed after --cli + + Returns: + Exit code (0 for success, 1 for error) + """ + # Set transport mode to "stdio" so OAuth callback server starts when needed + # This is required for authentication flow when no cached credentials exist + set_transport_mode("stdio") + + try: + parsed = parse_cli_args(cli_args) + + if parsed["command"] == "list": + output = list_tools(server, parsed["output_format"]) + print(output) + return 0 + + if parsed["command"] == "help": + output = show_tool_help(server, parsed["tool_name"]) + print(output) + return 0 + + if parsed["command"] == "run": + # Merge stdin args with inline args (inline takes precedence) + args = read_stdin_args() + args.update(parsed["tool_args"]) + + result = await run_tool(server, parsed["tool_name"], args) + print(result) + return 0 + + # Unknown command + print(f"Unknown command: {parsed['command']}") + return 1 + + except ValueError as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + except Exception as e: + logger.error(f"[CLI] Unexpected error: {e}", exc_info=True) + print(f"Error: {e}", file=sys.stderr) + return 1 diff --git a/core/comments.py b/core/comments.py new file mode 100644 index 0000000..844bdda --- /dev/null +++ b/core/comments.py @@ -0,0 +1,305 @@ +""" +Core Comments Module + +This module provides reusable comment management functions for Google Workspace applications. +All Google Workspace apps (Docs, Sheets, Slides) use the Drive API for comment operations. +""" + +import logging +import asyncio +from typing import Optional + +from auth.service_decorator import require_google_service +from core.server import server +from core.utils import handle_http_errors + +logger = logging.getLogger(__name__) + + +async def _manage_comment_dispatch( + service, + app_name: str, + file_id: str, + action: str, + comment_content: Optional[str] = None, + comment_id: Optional[str] = None, +) -> str: + """Route comment management actions to the appropriate implementation.""" + action_lower = action.lower().strip() + if action_lower == "create": + if not comment_content: + raise ValueError("comment_content is required for create action") + return await _create_comment_impl(service, app_name, file_id, comment_content) + elif action_lower == "reply": + if not comment_id or not comment_content: + raise ValueError( + "comment_id and comment_content are required for reply action" + ) + return await _reply_to_comment_impl( + service, app_name, file_id, comment_id, comment_content + ) + elif action_lower == "resolve": + if not comment_id: + raise ValueError("comment_id is required for resolve action") + return await _resolve_comment_impl(service, app_name, file_id, comment_id) + else: + raise ValueError( + f"Invalid action '{action_lower}'. Must be 'create', 'reply', or 'resolve'." + ) + + +def create_comment_tools(app_name: str, file_id_param: str): + """ + Factory function to create comment management tools for a specific Google Workspace app. + + Args: + app_name: Name of the app (e.g., "document", "spreadsheet", "presentation") + file_id_param: Parameter name for the file ID (e.g., "document_id", "spreadsheet_id", "presentation_id") + + Returns: + Dict containing the comment management functions with unique names + """ + + # --- Consolidated tools --- + list_func_name = f"list_{app_name}_comments" + manage_func_name = f"manage_{app_name}_comment" + + if file_id_param == "document_id": + + @require_google_service("drive", "drive_read") + @handle_http_errors(list_func_name, service_type="drive") + async def list_comments( + service, user_google_email: str, document_id: str + ) -> str: + """List all comments from a Google Document.""" + return await _read_comments_impl(service, app_name, document_id) + + @require_google_service("drive", "drive_file") + @handle_http_errors(manage_func_name, service_type="drive") + async def manage_comment( + service, + user_google_email: str, + document_id: str, + action: str, + comment_content: Optional[str] = None, + comment_id: Optional[str] = None, + ) -> str: + """Manage comments on a Google Document. + + Actions: + - create: Create a new comment. Requires comment_content. + - reply: Reply to a comment. Requires comment_id and comment_content. + - resolve: Resolve a comment. Requires comment_id. + """ + return await _manage_comment_dispatch( + service, app_name, document_id, action, comment_content, comment_id + ) + + elif file_id_param == "spreadsheet_id": + + @require_google_service("drive", "drive_read") + @handle_http_errors(list_func_name, service_type="drive") + async def list_comments( + service, user_google_email: str, spreadsheet_id: str + ) -> str: + """List all comments from a Google Spreadsheet.""" + return await _read_comments_impl(service, app_name, spreadsheet_id) + + @require_google_service("drive", "drive_file") + @handle_http_errors(manage_func_name, service_type="drive") + async def manage_comment( + service, + user_google_email: str, + spreadsheet_id: str, + action: str, + comment_content: Optional[str] = None, + comment_id: Optional[str] = None, + ) -> str: + """Manage comments on a Google Spreadsheet. + + Actions: + - create: Create a new comment. Requires comment_content. + - reply: Reply to a comment. Requires comment_id and comment_content. + - resolve: Resolve a comment. Requires comment_id. + """ + return await _manage_comment_dispatch( + service, app_name, spreadsheet_id, action, comment_content, comment_id + ) + + elif file_id_param == "presentation_id": + + @require_google_service("drive", "drive_read") + @handle_http_errors(list_func_name, service_type="drive") + async def list_comments( + service, user_google_email: str, presentation_id: str + ) -> str: + """List all comments from a Google Presentation.""" + return await _read_comments_impl(service, app_name, presentation_id) + + @require_google_service("drive", "drive_file") + @handle_http_errors(manage_func_name, service_type="drive") + async def manage_comment( + service, + user_google_email: str, + presentation_id: str, + action: str, + comment_content: Optional[str] = None, + comment_id: Optional[str] = None, + ) -> str: + """Manage comments on a Google Presentation. + + Actions: + - create: Create a new comment. Requires comment_content. + - reply: Reply to a comment. Requires comment_id and comment_content. + - resolve: Resolve a comment. Requires comment_id. + """ + return await _manage_comment_dispatch( + service, app_name, presentation_id, action, comment_content, comment_id + ) + + list_comments.__name__ = list_func_name + manage_comment.__name__ = manage_func_name + server.tool()(list_comments) + server.tool()(manage_comment) + + return { + "list_comments": list_comments, + "manage_comment": manage_comment, + } + + +async def _read_comments_impl(service, app_name: str, file_id: str) -> str: + """Implementation for reading comments from any Google Workspace file.""" + logger.info(f"[read_{app_name}_comments] Reading comments for {app_name} {file_id}") + + response = await asyncio.to_thread( + service.comments() + .list( + fileId=file_id, + fields="comments(id,content,author,createdTime,modifiedTime,resolved,quotedFileContent,replies(content,author,id,createdTime,modifiedTime))", + ) + .execute + ) + + comments = response.get("comments", []) + + if not comments: + return f"No comments found in {app_name} {file_id}" + + output = [f"Found {len(comments)} comments in {app_name} {file_id}:\\n"] + + for comment in comments: + author = comment.get("author", {}).get("displayName", "Unknown") + content = comment.get("content", "") + created = comment.get("createdTime", "") + resolved = comment.get("resolved", False) + comment_id = comment.get("id", "") + status = " [RESOLVED]" if resolved else "" + + quoted_text = comment.get("quotedFileContent", {}).get("value", "") + + output.append(f"Comment ID: {comment_id}") + output.append(f"Author: {author}") + output.append(f"Created: {created}{status}") + if quoted_text: + output.append(f"Quoted text: {quoted_text}") + output.append(f"Content: {content}") + + # Add replies if any + replies = comment.get("replies", []) + if replies: + output.append(f" Replies ({len(replies)}):") + for reply in replies: + reply_author = reply.get("author", {}).get("displayName", "Unknown") + reply_content = reply.get("content", "") + reply_created = reply.get("createdTime", "") + reply_id = reply.get("id", "") + output.append(f" Reply ID: {reply_id}") + output.append(f" Author: {reply_author}") + output.append(f" Created: {reply_created}") + output.append(f" Content: {reply_content}") + + output.append("") # Empty line between comments + + return "\\n".join(output) + + +async def _create_comment_impl( + service, app_name: str, file_id: str, comment_content: str +) -> str: + """Implementation for creating a comment on any Google Workspace file.""" + logger.info(f"[create_{app_name}_comment] Creating comment in {app_name} {file_id}") + + body = {"content": comment_content} + + comment = await asyncio.to_thread( + service.comments() + .create( + fileId=file_id, + body=body, + fields="id,content,author,createdTime,modifiedTime", + ) + .execute + ) + + comment_id = comment.get("id", "") + author = comment.get("author", {}).get("displayName", "Unknown") + created = comment.get("createdTime", "") + + return f"Comment created successfully!\\nComment ID: {comment_id}\\nAuthor: {author}\\nCreated: {created}\\nContent: {comment_content}" + + +async def _reply_to_comment_impl( + service, app_name: str, file_id: str, comment_id: str, reply_content: str +) -> str: + """Implementation for replying to a comment on any Google Workspace file.""" + logger.info( + f"[reply_to_{app_name}_comment] Replying to comment {comment_id} in {app_name} {file_id}" + ) + + body = {"content": reply_content} + + reply = await asyncio.to_thread( + service.replies() + .create( + fileId=file_id, + commentId=comment_id, + body=body, + fields="id,content,author,createdTime,modifiedTime", + ) + .execute + ) + + reply_id = reply.get("id", "") + author = reply.get("author", {}).get("displayName", "Unknown") + created = reply.get("createdTime", "") + + return f"Reply posted successfully!\\nReply ID: {reply_id}\\nAuthor: {author}\\nCreated: {created}\\nContent: {reply_content}" + + +async def _resolve_comment_impl( + service, app_name: str, file_id: str, comment_id: str +) -> str: + """Implementation for resolving a comment on any Google Workspace file.""" + logger.info( + f"[resolve_{app_name}_comment] Resolving comment {comment_id} in {app_name} {file_id}" + ) + + body = {"content": "This comment has been resolved.", "action": "resolve"} + + reply = await asyncio.to_thread( + service.replies() + .create( + fileId=file_id, + commentId=comment_id, + body=body, + fields="id,content,author,createdTime,modifiedTime", + ) + .execute + ) + + reply_id = reply.get("id", "") + author = reply.get("author", {}).get("displayName", "Unknown") + created = reply.get("createdTime", "") + + return f"Comment {comment_id} has been resolved successfully.\\nResolve reply ID: {reply_id}\\nAuthor: {author}\\nCreated: {created}" diff --git a/core/config.py b/core/config.py new file mode 100644 index 0000000..e7b8aaa --- /dev/null +++ b/core/config.py @@ -0,0 +1,37 @@ +""" +Shared configuration for Google Workspace MCP server. +This module holds configuration values that need to be shared across modules +to avoid circular imports. + +NOTE: OAuth configuration has been moved to auth.oauth_config for centralization. +This module now imports from there for backward compatibility. +""" + +import os +from auth.oauth_config import ( + get_oauth_base_url, + get_oauth_redirect_uri, + set_transport_mode, + get_transport_mode, + is_oauth21_enabled, +) + +# Server configuration +WORKSPACE_MCP_PORT = int(os.getenv("PORT", os.getenv("WORKSPACE_MCP_PORT", 8000))) +WORKSPACE_MCP_BASE_URI = os.getenv("WORKSPACE_MCP_BASE_URI", "http://localhost") + +# Disable USER_GOOGLE_EMAIL in OAuth 2.1 multi-user mode +USER_GOOGLE_EMAIL = ( + None if is_oauth21_enabled() else os.getenv("USER_GOOGLE_EMAIL", None) +) + +# Re-export OAuth functions for backward compatibility +__all__ = [ + "WORKSPACE_MCP_PORT", + "WORKSPACE_MCP_BASE_URI", + "USER_GOOGLE_EMAIL", + "get_oauth_base_url", + "get_oauth_redirect_uri", + "set_transport_mode", + "get_transport_mode", +] diff --git a/core/context.py b/core/context.py new file mode 100644 index 0000000..f0780f3 --- /dev/null +++ b/core/context.py @@ -0,0 +1,43 @@ +# core/context.py +import contextvars +from typing import Optional + +# Context variable to hold injected credentials for the life of a single request. +_injected_oauth_credentials = contextvars.ContextVar( + "injected_oauth_credentials", default=None +) + +# Context variable to hold FastMCP session ID for the life of a single request. +_fastmcp_session_id = contextvars.ContextVar("fastmcp_session_id", default=None) + + +def get_injected_oauth_credentials(): + """ + Retrieve injected OAuth credentials for the current request context. + This is called by the authentication layer to check for request-scoped credentials. + """ + return _injected_oauth_credentials.get() + + +def set_injected_oauth_credentials(credentials: Optional[dict]): + """ + Set or clear the injected OAuth credentials for the current request context. + This is called by the service decorator. + """ + _injected_oauth_credentials.set(credentials) + + +def get_fastmcp_session_id() -> Optional[str]: + """ + Retrieve the FastMCP session ID for the current request context. + This is called by authentication layer to get the current session. + """ + return _fastmcp_session_id.get() + + +def set_fastmcp_session_id(session_id: Optional[str]): + """ + Set or clear the FastMCP session ID for the current request context. + This is called when a FastMCP request starts. + """ + _fastmcp_session_id.set(session_id) diff --git a/core/log_formatter.py b/core/log_formatter.py new file mode 100644 index 0000000..9490054 --- /dev/null +++ b/core/log_formatter.py @@ -0,0 +1,207 @@ +""" +Enhanced Log Formatter for Google Workspace MCP + +Provides visually appealing log formatting with emojis and consistent styling +to match the safe_print output format. +""" + +import logging +import os +import re +import sys + + +class EnhancedLogFormatter(logging.Formatter): + """Custom log formatter that adds ASCII prefixes and visual enhancements to log messages.""" + + # Color codes for terminals that support ANSI colors + COLORS = { + "DEBUG": "\033[36m", # Cyan + "INFO": "\033[32m", # Green + "WARNING": "\033[33m", # Yellow + "ERROR": "\033[31m", # Red + "CRITICAL": "\033[35m", # Magenta + "RESET": "\033[0m", # Reset + } + + def __init__(self, use_colors: bool = True, *args, **kwargs): + """ + Initialize the emoji log formatter. + + Args: + use_colors: Whether to use ANSI color codes (default: True) + """ + super().__init__(*args, **kwargs) + self.use_colors = use_colors + + def format(self, record: logging.LogRecord) -> str: + """Format the log record with ASCII prefixes and enhanced styling.""" + # Get the appropriate ASCII prefix for the service + service_prefix = self._get_ascii_prefix(record.name, record.levelname) + + # Format the message with enhanced styling + formatted_msg = self._enhance_message(record.getMessage()) + + # Build the formatted log entry + if self.use_colors: + color = self.COLORS.get(record.levelname, "") + reset = self.COLORS["RESET"] + return f"{service_prefix} {color}{formatted_msg}{reset}" + else: + return f"{service_prefix} {formatted_msg}" + + def _get_ascii_prefix(self, logger_name: str, level_name: str) -> str: + """Get ASCII-safe prefix for Windows compatibility.""" + # ASCII-safe prefixes for different services + ascii_prefixes = { + "core.tool_tier_loader": "[TOOLS]", + "core.tool_registry": "[REGISTRY]", + "auth.scopes": "[AUTH]", + "core.utils": "[UTILS]", + "auth.google_auth": "[OAUTH]", + "auth.credential_store": "[CREDS]", + "gcalendar.calendar_tools": "[CALENDAR]", + "gdrive.drive_tools": "[DRIVE]", + "gmail.gmail_tools": "[GMAIL]", + "gdocs.docs_tools": "[DOCS]", + "gsheets.sheets_tools": "[SHEETS]", + "gchat.chat_tools": "[CHAT]", + "gforms.forms_tools": "[FORMS]", + "gslides.slides_tools": "[SLIDES]", + "gtasks.tasks_tools": "[TASKS]", + "gsearch.search_tools": "[SEARCH]", + } + + return ascii_prefixes.get(logger_name, f"[{level_name}]") + + def _enhance_message(self, message: str) -> str: + """Enhance the log message with better formatting.""" + # Handle common patterns for better visual appeal + + # Tool tier loading messages + if "resolved to" in message and "tools across" in message: + # Extract numbers and service names for better formatting + pattern = ( + r"Tier '(\w+)' resolved to (\d+) tools across (\d+) services: (.+)" + ) + match = re.search(pattern, message) + if match: + tier, tool_count, service_count, services = match.groups() + return f"Tool tier '{tier}' loaded: {tool_count} tools across {service_count} services [{services}]" + + # Configuration loading messages + if "Loaded tool tiers configuration from" in message: + path = message.split("from ")[-1] + return f"Configuration loaded from {path}" + + # Tool filtering messages + if "Tool tier filtering" in message: + pattern = r"removed (\d+) tools, (\d+) enabled" + match = re.search(pattern, message) + if match: + removed, enabled = match.groups() + return f"Tool filtering complete: {enabled} tools enabled ({removed} filtered out)" + + # Enabled tools messages + if "Enabled tools set for scope management" in message: + tools = message.split(": ")[-1] + return f"Scope management configured for tools: {tools}" + + # Credentials directory messages + if "Credentials directory permissions check passed" in message: + path = message.split(": ")[-1] + return f"Credentials directory verified: {path}" + + # If no specific pattern matches, return the original message + return message + + +def setup_enhanced_logging( + log_level: int = logging.INFO, use_colors: bool = True +) -> None: + """ + Set up enhanced logging with ASCII prefix formatter for the entire application. + + Args: + log_level: The logging level to use (default: INFO) + use_colors: Whether to use ANSI colors (default: True) + """ + # Create the enhanced formatter + formatter = EnhancedLogFormatter(use_colors=use_colors) + + # Get the root logger + root_logger = logging.getLogger() + + # Update existing console handlers + for handler in root_logger.handlers: + if isinstance(handler, logging.StreamHandler) and handler.stream.name in [ + "", + "", + ]: + handler.setFormatter(formatter) + + # If no console handler exists, create one + console_handlers = [ + h + for h in root_logger.handlers + if isinstance(h, logging.StreamHandler) + and h.stream.name in ["", ""] + ] + + if not console_handlers: + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + console_handler.setLevel(log_level) + root_logger.addHandler(console_handler) + + +def configure_file_logging(logger_name: str = None) -> bool: + """ + Configure file logging based on stateless mode setting. + + In stateless mode, file logging is completely disabled to avoid filesystem writes. + In normal mode, sets up detailed file logging to 'mcp_server_debug.log'. + + Args: + logger_name: Optional name for the logger (defaults to root logger) + + Returns: + bool: True if file logging was configured, False if skipped (stateless mode) + """ + # Check if stateless mode is enabled + stateless_mode = ( + os.getenv("WORKSPACE_MCP_STATELESS_MODE", "false").lower() == "true" + ) + + if stateless_mode: + logger = logging.getLogger(logger_name) + logger.debug("File logging disabled in stateless mode") + return False + + # Configure file logging for normal mode + try: + target_logger = logging.getLogger(logger_name) + log_file_dir = os.path.dirname(os.path.abspath(__file__)) + # Go up one level since we're in core/ subdirectory + log_file_dir = os.path.dirname(log_file_dir) + log_file_path = os.path.join(log_file_dir, "mcp_server_debug.log") + + file_handler = logging.FileHandler(log_file_path, mode="a") + file_handler.setLevel(logging.DEBUG) + + file_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - %(threadName)s " + "[%(module)s.%(funcName)s:%(lineno)d] - %(message)s" + ) + file_handler.setFormatter(file_formatter) + target_logger.addHandler(file_handler) + + logger = logging.getLogger(logger_name) + logger.debug(f"Detailed file logging configured to: {log_file_path}") + return True + + except Exception as e: + sys.stderr.write( + f"CRITICAL: Failed to set up file logging to '{log_file_path}': {e}\n" + ) + return False diff --git a/core/server.py b/core/server.py new file mode 100644 index 0000000..69f970e --- /dev/null +++ b/core/server.py @@ -0,0 +1,620 @@ +import hashlib +import logging +import os +from typing import List, Optional +from importlib import metadata + +from fastapi.responses import HTMLResponse, JSONResponse, FileResponse +from starlette.applications import Starlette +from starlette.datastructures import MutableHeaders +from starlette.types import Scope, Receive, Send +from starlette.requests import Request +from starlette.middleware import Middleware + +from fastmcp import FastMCP +from fastmcp.server.auth.providers.google import GoogleProvider + +from auth.oauth21_session_store import get_oauth21_session_store, set_auth_provider +from auth.google_auth import handle_auth_callback, start_auth_flow, check_client_secrets +from auth.oauth_config import is_oauth21_enabled, is_external_oauth21_provider +from auth.mcp_session_middleware import MCPSessionMiddleware +from auth.oauth_responses import ( + create_error_response, + create_success_response, + create_server_error_response, +) +from auth.auth_info_middleware import AuthInfoMiddleware +from auth.scopes import SCOPES, get_current_scopes # noqa +from core.config import ( + USER_GOOGLE_EMAIL, + get_transport_mode, + set_transport_mode as _set_transport_mode, + get_oauth_redirect_uri as get_oauth_redirect_uri_for_current_mode, +) + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +_auth_provider: Optional[GoogleProvider] = None +_legacy_callback_registered = False + +session_middleware = Middleware(MCPSessionMiddleware) + + +class WellKnownCacheControlMiddleware: + """Force no-cache headers for OAuth well-known discovery endpoints.""" + + def __init__(self, app): + self.app = app + + async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: + if scope["type"] != "http": + await self.app(scope, receive, send) + return + + path = scope.get("path", "") + is_oauth_well_known = ( + path == "/.well-known/oauth-authorization-server" + or path.startswith("/.well-known/oauth-authorization-server/") + or path == "/.well-known/oauth-protected-resource" + or path.startswith("/.well-known/oauth-protected-resource/") + ) + if not is_oauth_well_known: + await self.app(scope, receive, send) + return + + async def send_with_no_cache_headers(message): + if message["type"] == "http.response.start": + headers = MutableHeaders(raw=message.setdefault("headers", [])) + headers["Cache-Control"] = "no-store, must-revalidate" + headers["ETag"] = f'"{_compute_scope_fingerprint()}"' + await send(message) + + await self.app(scope, receive, send_with_no_cache_headers) + + +well_known_cache_control_middleware = Middleware(WellKnownCacheControlMiddleware) + + +def _compute_scope_fingerprint() -> str: + """Compute a short hash of the current scope configuration for cache-busting.""" + scopes_str = ",".join(sorted(get_current_scopes())) + return hashlib.sha256(scopes_str.encode()).hexdigest()[:12] + + +# Custom FastMCP that adds secure middleware stack for OAuth 2.1 +class SecureFastMCP(FastMCP): + def http_app(self, **kwargs) -> "Starlette": + """Override to add secure middleware stack for OAuth 2.1.""" + app = super().http_app(**kwargs) + + # Add middleware in order (first added = outermost layer) + app.user_middleware.insert(0, well_known_cache_control_middleware) + + # Session Management - extracts session info for MCP context + app.user_middleware.insert(1, session_middleware) + + # Rebuild middleware stack + app.middleware_stack = app.build_middleware_stack() + logger.info("Added middleware stack: WellKnownCacheControl, Session Management") + return app + + +# Build server instructions with user email context for single-user mode +_server_instructions = None +if USER_GOOGLE_EMAIL: + _server_instructions = f"""Connected Google account: {USER_GOOGLE_EMAIL} + +When using Google Workspace tools, always use `{USER_GOOGLE_EMAIL}` as the `user_google_email` parameter. Do not ask the user for their email address.""" + logger.info(f"Server instructions configured for user: {USER_GOOGLE_EMAIL}") + +server = SecureFastMCP( + name="google_workspace", + auth=None, + instructions=_server_instructions, +) + +# Add the AuthInfo middleware to inject authentication into FastMCP context +auth_info_middleware = AuthInfoMiddleware() +server.add_middleware(auth_info_middleware) + + +def _parse_bool_env(value: str) -> bool: + """Parse environment variable string to boolean.""" + return value.lower() in ("1", "true", "yes", "on") + + +def set_transport_mode(mode: str): + """Sets the transport mode for the server.""" + _set_transport_mode(mode) + logger.info(f"Transport: {mode}") + + +def _ensure_legacy_callback_route() -> None: + global _legacy_callback_registered + if _legacy_callback_registered: + return + server.custom_route("/oauth2callback", methods=["GET"])(legacy_oauth2_callback) + _legacy_callback_registered = True + + +def configure_server_for_http(): + """ + Configures the authentication provider for HTTP transport. + This must be called BEFORE server.run(). + """ + global _auth_provider + + transport_mode = get_transport_mode() + + if transport_mode != "streamable-http": + return + + # Use centralized OAuth configuration + from auth.oauth_config import get_oauth_config + + config = get_oauth_config() + + # Check if OAuth 2.1 is enabled via centralized config + oauth21_enabled = config.is_oauth21_enabled() + + if oauth21_enabled: + if not config.is_configured(): + logger.warning("OAuth 2.1 enabled but OAuth credentials not configured") + return + + def validate_and_derive_jwt_key( + jwt_signing_key_override: str | None, client_secret: str + ) -> bytes: + """Validate JWT signing key override and derive the final JWT key.""" + if jwt_signing_key_override: + if len(jwt_signing_key_override) < 12: + logger.warning( + "OAuth 2.1: FASTMCP_SERVER_AUTH_GOOGLE_JWT_SIGNING_KEY is less than 12 characters; " + "use a longer secret to improve key derivation strength." + ) + return derive_jwt_key( + low_entropy_material=jwt_signing_key_override, + salt="fastmcp-jwt-signing-key", + ) + else: + return derive_jwt_key( + high_entropy_material=client_secret, + salt="fastmcp-jwt-signing-key", + ) + + try: + # Import common dependencies for storage backends + from key_value.aio.wrappers.encryption import FernetEncryptionWrapper + from cryptography.fernet import Fernet + from fastmcp.server.auth.jwt_issuer import derive_jwt_key + + required_scopes: List[str] = sorted(get_current_scopes()) + + client_storage = None + jwt_signing_key_override = ( + os.getenv("FASTMCP_SERVER_AUTH_GOOGLE_JWT_SIGNING_KEY", "").strip() + or None + ) + storage_backend = ( + os.getenv("WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND", "") + .strip() + .lower() + ) + valkey_host = os.getenv("WORKSPACE_MCP_OAUTH_PROXY_VALKEY_HOST", "").strip() + + # Determine storage backend: valkey, disk, memory (default) + use_valkey = storage_backend == "valkey" or bool(valkey_host) + use_disk = storage_backend == "disk" + + if use_valkey: + try: + from key_value.aio.stores.valkey import ValkeyStore + + valkey_port_raw = os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_VALKEY_PORT", "6379" + ).strip() + valkey_db_raw = os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_VALKEY_DB", "0" + ).strip() + + valkey_port = int(valkey_port_raw) + valkey_db = int(valkey_db_raw) + valkey_use_tls_raw = os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_VALKEY_USE_TLS", "" + ).strip() + valkey_use_tls = ( + _parse_bool_env(valkey_use_tls_raw) + if valkey_use_tls_raw + else valkey_port == 6380 + ) + + valkey_request_timeout_ms_raw = os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_VALKEY_REQUEST_TIMEOUT_MS", "" + ).strip() + valkey_connection_timeout_ms_raw = os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_VALKEY_CONNECTION_TIMEOUT_MS", "" + ).strip() + + valkey_request_timeout_ms = ( + int(valkey_request_timeout_ms_raw) + if valkey_request_timeout_ms_raw + else None + ) + valkey_connection_timeout_ms = ( + int(valkey_connection_timeout_ms_raw) + if valkey_connection_timeout_ms_raw + else None + ) + + valkey_username = ( + os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_VALKEY_USERNAME", "" + ).strip() + or None + ) + valkey_password = ( + os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_VALKEY_PASSWORD", "" + ).strip() + or None + ) + + if not valkey_host: + valkey_host = "localhost" + + client_storage = ValkeyStore( + host=valkey_host, + port=valkey_port, + db=valkey_db, + username=valkey_username, + password=valkey_password, + ) + + # Configure TLS and timeouts on the underlying Glide client config. + # ValkeyStore currently doesn't expose these settings directly. + glide_config = getattr(client_storage, "_client_config", None) + if glide_config is not None: + glide_config.use_tls = valkey_use_tls + + is_remote_host = valkey_host not in {"localhost", "127.0.0.1"} + if valkey_request_timeout_ms is None and ( + valkey_use_tls or is_remote_host + ): + # Glide defaults to 250ms if unset; increase for remote/TLS endpoints. + valkey_request_timeout_ms = 5000 + if valkey_request_timeout_ms is not None: + glide_config.request_timeout = valkey_request_timeout_ms + + if valkey_connection_timeout_ms is None and ( + valkey_use_tls or is_remote_host + ): + valkey_connection_timeout_ms = 10000 + if valkey_connection_timeout_ms is not None: + from glide_shared.config import ( + AdvancedGlideClientConfiguration, + ) + + glide_config.advanced_config = ( + AdvancedGlideClientConfiguration( + connection_timeout=valkey_connection_timeout_ms + ) + ) + + jwt_signing_key = validate_and_derive_jwt_key( + jwt_signing_key_override, config.client_secret + ) + + storage_encryption_key = derive_jwt_key( + high_entropy_material=jwt_signing_key.decode(), + salt="fastmcp-storage-encryption-key", + ) + + client_storage = FernetEncryptionWrapper( + key_value=client_storage, + fernet=Fernet(key=storage_encryption_key), + ) + logger.info( + "OAuth 2.1: Using ValkeyStore for FastMCP OAuth proxy client_storage (host=%s, port=%s, db=%s, tls=%s)", + valkey_host, + valkey_port, + valkey_db, + valkey_use_tls, + ) + if valkey_request_timeout_ms is not None: + logger.info( + "OAuth 2.1: Valkey request timeout set to %sms", + valkey_request_timeout_ms, + ) + if valkey_connection_timeout_ms is not None: + logger.info( + "OAuth 2.1: Valkey connection timeout set to %sms", + valkey_connection_timeout_ms, + ) + logger.info( + "OAuth 2.1: Applied Fernet encryption wrapper to Valkey client_storage (key derived from FASTMCP_SERVER_AUTH_GOOGLE_JWT_SIGNING_KEY or GOOGLE_OAUTH_CLIENT_SECRET)." + ) + except ImportError as exc: + logger.warning( + "OAuth 2.1: Valkey client_storage requested but Valkey dependencies are not installed (%s). " + "Install 'workspace-mcp[valkey]' (or 'py-key-value-aio[valkey]', which includes 'valkey-glide') " + "or unset WORKSPACE_MCP_OAUTH_PROXY_STORAGE_BACKEND/WORKSPACE_MCP_OAUTH_PROXY_VALKEY_HOST.", + exc, + ) + except ValueError as exc: + logger.warning( + "OAuth 2.1: Invalid Valkey configuration; falling back to default storage (%s).", + exc, + ) + elif use_disk: + try: + from key_value.aio.stores.filetree import FileTreeStore + + disk_directory = os.getenv( + "WORKSPACE_MCP_OAUTH_PROXY_DISK_DIRECTORY", "" + ).strip() + if not disk_directory: + # Default to FASTMCP_HOME/oauth-proxy or ~/.fastmcp/oauth-proxy + fastmcp_home = os.getenv("FASTMCP_HOME", "").strip() + if fastmcp_home: + disk_directory = os.path.join(fastmcp_home, "oauth-proxy") + else: + disk_directory = os.path.expanduser( + "~/.fastmcp/oauth-proxy" + ) + + client_storage = FileTreeStore(data_directory=disk_directory) + + jwt_signing_key = validate_and_derive_jwt_key( + jwt_signing_key_override, config.client_secret + ) + + storage_encryption_key = derive_jwt_key( + high_entropy_material=jwt_signing_key.decode(), + salt="fastmcp-storage-encryption-key", + ) + + client_storage = FernetEncryptionWrapper( + key_value=client_storage, + fernet=Fernet(key=storage_encryption_key), + ) + logger.info( + "OAuth 2.1: Using FileTreeStore for FastMCP OAuth proxy client_storage (directory=%s)", + disk_directory, + ) + except ImportError as exc: + logger.warning( + "OAuth 2.1: Disk storage requested but dependencies not available (%s). " + "Falling back to default storage.", + exc, + ) + elif storage_backend == "memory": + from key_value.aio.stores.memory import MemoryStore + + client_storage = MemoryStore() + logger.info( + "OAuth 2.1: Using MemoryStore for FastMCP OAuth proxy client_storage" + ) + # else: client_storage remains None, FastMCP uses its default + + # Ensure JWT signing key is always derived for all storage backends + if "jwt_signing_key" not in locals(): + jwt_signing_key = validate_and_derive_jwt_key( + jwt_signing_key_override, config.client_secret + ) + + # Check if external OAuth provider is configured + if config.is_external_oauth21_provider(): + # External OAuth mode: use custom provider that handles ya29.* access tokens + from auth.external_oauth_provider import ExternalOAuthProvider + + provider = ExternalOAuthProvider( + client_id=config.client_id, + client_secret=config.client_secret, + base_url=config.get_oauth_base_url(), + redirect_path=config.redirect_path, + required_scopes=required_scopes, + resource_server_url=config.get_oauth_base_url(), + ) + server.auth = provider + + logger.info("OAuth 2.1 enabled with EXTERNAL provider mode") + logger.info( + "Expecting Authorization bearer tokens in tool call headers" + ) + logger.info( + "Protected resource metadata points to Google's authorization server" + ) + else: + # Standard OAuth 2.1 mode: use FastMCP's GoogleProvider + provider = GoogleProvider( + client_id=config.client_id, + client_secret=config.client_secret, + base_url=config.get_oauth_base_url(), + redirect_path=config.redirect_path, + required_scopes=required_scopes, + client_storage=client_storage, + jwt_signing_key=jwt_signing_key, + ) + # Enable protocol-level auth + server.auth = provider + logger.info( + "OAuth 2.1 enabled using FastMCP GoogleProvider with protocol-level auth" + ) + + # Always set auth provider for token validation in middleware + set_auth_provider(provider) + _auth_provider = provider + except Exception as exc: + logger.error( + "Failed to initialize FastMCP GoogleProvider: %s", exc, exc_info=True + ) + raise + else: + logger.info("OAuth 2.0 mode - Server will use legacy authentication.") + server.auth = None + _auth_provider = None + set_auth_provider(None) + _ensure_legacy_callback_route() + + +def get_auth_provider() -> Optional[GoogleProvider]: + """Gets the global authentication provider instance.""" + return _auth_provider + + +@server.custom_route("/", methods=["GET"]) +@server.custom_route("/health", methods=["GET"]) +async def health_check(request: Request): + try: + version = metadata.version("workspace-mcp") + except metadata.PackageNotFoundError: + version = "dev" + return JSONResponse( + { + "status": "healthy", + "service": "workspace-mcp", + "version": version, + "transport": get_transport_mode(), + } + ) + + +@server.custom_route("/attachments/{file_id}", methods=["GET"]) +async def serve_attachment(request: Request): + """Serve a stored attachment file.""" + from core.attachment_storage import get_attachment_storage + + file_id = request.path_params["file_id"] + storage = get_attachment_storage() + metadata = storage.get_attachment_metadata(file_id) + + if not metadata: + return JSONResponse( + {"error": "Attachment not found or expired"}, status_code=404 + ) + + file_path = storage.get_attachment_path(file_id) + if not file_path: + return JSONResponse({"error": "Attachment file not found"}, status_code=404) + + return FileResponse( + path=str(file_path), + filename=metadata["filename"], + media_type=metadata["mime_type"], + ) + + +async def legacy_oauth2_callback(request: Request) -> HTMLResponse: + state = request.query_params.get("state") + code = request.query_params.get("code") + error = request.query_params.get("error") + + if error: + msg = ( + f"Authentication failed: Google returned an error: {error}. State: {state}." + ) + logger.error(msg) + return create_error_response(msg) + + if not code: + msg = "Authentication failed: No authorization code received from Google." + logger.error(msg) + return create_error_response(msg) + + try: + error_message = check_client_secrets() + if error_message: + return create_server_error_response(error_message) + + logger.info("OAuth callback: Received authorization code.") + + mcp_session_id = None + if hasattr(request, "state") and hasattr(request.state, "session_id"): + mcp_session_id = request.state.session_id + + verified_user_id, credentials = handle_auth_callback( + scopes=get_current_scopes(), + authorization_response=str(request.url), + redirect_uri=get_oauth_redirect_uri_for_current_mode(), + session_id=mcp_session_id, + ) + + logger.info( + f"OAuth callback: Successfully authenticated user: {verified_user_id}." + ) + + try: + store = get_oauth21_session_store() + + store.store_session( + user_email=verified_user_id, + access_token=credentials.token, + refresh_token=credentials.refresh_token, + token_uri=credentials.token_uri, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + scopes=credentials.scopes, + expiry=credentials.expiry, + session_id=f"google-{state}", + mcp_session_id=mcp_session_id, + ) + logger.info( + f"Stored Google credentials in OAuth 2.1 session store for {verified_user_id}" + ) + except Exception as e: + logger.error(f"Failed to store credentials in OAuth 2.1 store: {e}") + + return create_success_response(verified_user_id) + except Exception as e: + logger.error(f"Error processing OAuth callback: {str(e)}", exc_info=True) + return create_server_error_response(str(e)) + + +@server.tool() +async def start_google_auth( + service_name: str, user_google_email: str = USER_GOOGLE_EMAIL +) -> str: + """ + Manually initiate Google OAuth authentication flow. + + NOTE: This is a legacy OAuth 2.0 tool and is disabled when OAuth 2.1 is enabled. + The authentication system automatically handles credential checks and prompts for + authentication when needed. Only use this tool if: + 1. You need to re-authenticate with different credentials + 2. You want to proactively authenticate before using other tools + 3. The automatic authentication flow failed and you need to retry + + In most cases, simply try calling the Google Workspace tool you need - it will + automatically handle authentication if required. + """ + if is_oauth21_enabled(): + if is_external_oauth21_provider(): + return ( + "start_google_auth is disabled when OAuth 2.1 is enabled. " + "Provide a valid OAuth 2.1 bearer token in the Authorization header " + "and retry the original tool." + ) + return ( + "start_google_auth is disabled when OAuth 2.1 is enabled. " + "Authenticate through your MCP client's OAuth 2.1 flow and retry the " + "original tool." + ) + + if not user_google_email: + raise ValueError("user_google_email must be provided.") + + error_message = check_client_secrets() + if error_message: + return f"**Authentication Error:** {error_message}" + + try: + auth_message = await start_auth_flow( + user_google_email=user_google_email, + service_name=service_name, + redirect_uri=get_oauth_redirect_uri_for_current_mode(), + ) + return auth_message + except Exception as e: + logger.error(f"Failed to start Google authentication flow: {e}", exc_info=True) + return f"**Error:** An unexpected error occurred: {e}" diff --git a/core/tool_registry.py b/core/tool_registry.py new file mode 100644 index 0000000..206d561 --- /dev/null +++ b/core/tool_registry.py @@ -0,0 +1,211 @@ +""" +Tool Registry for Conditional Tool Registration + +This module provides a registry system that allows tools to be conditionally registered +based on tier configuration, replacing direct @server.tool() decorators. +""" + +import logging +from typing import Set, Optional, Callable + +from auth.oauth_config import is_oauth21_enabled +from auth.permissions import is_permissions_mode, get_allowed_scopes_set +from auth.scopes import is_read_only_mode, get_all_read_only_scopes + +logger = logging.getLogger(__name__) + +# Global registry of enabled tools +_enabled_tools: Optional[Set[str]] = None + + +def set_enabled_tools(tool_names: Optional[Set[str]]): + """Set the globally enabled tools.""" + global _enabled_tools + _enabled_tools = tool_names + + +def get_enabled_tools() -> Optional[Set[str]]: + """Get the set of enabled tools, or None if all tools are enabled.""" + return _enabled_tools + + +def is_tool_enabled(tool_name: str) -> bool: + """Check if a specific tool is enabled.""" + if _enabled_tools is None: + return True # All tools enabled by default + return tool_name in _enabled_tools + + +def conditional_tool(server, tool_name: str): + """ + Decorator that conditionally registers a tool based on the enabled tools set. + + Args: + server: The FastMCP server instance + tool_name: The name of the tool to register + + Returns: + Either the registered tool decorator or a no-op decorator + """ + + def decorator(func: Callable) -> Callable: + if is_tool_enabled(tool_name): + logger.debug(f"Registering tool: {tool_name}") + return server.tool()(func) + else: + logger.debug(f"Skipping tool registration: {tool_name}") + return func + + return decorator + + +def wrap_server_tool_method(server): + """ + Track tool registrations and filter them post-registration. + """ + original_tool = server.tool + server._tracked_tools = [] + + def tracking_tool(*args, **kwargs): + original_decorator = original_tool(*args, **kwargs) + + def wrapper_decorator(func: Callable) -> Callable: + tool_name = func.__name__ + server._tracked_tools.append(tool_name) + # Always apply the original decorator to register the tool + return original_decorator(func) + + return wrapper_decorator + + server.tool = tracking_tool + + +def get_tool_components(server) -> dict: + """Get tool components dict from server's local_provider. + + Returns a dict mapping tool_name -> tool_object for introspection. + + Note: Uses local_provider._components because the public list_tools() + is async-only, and callers (startup filtering, CLI) run synchronously. + """ + lp = getattr(server, "local_provider", None) + if lp is None: + return {} + components = getattr(lp, "_components", {}) + tools = {} + for key, component in components.items(): + if key.startswith("tool:"): + # Keys are like "tool:name@version", extract the name + name = key.split(":", 1)[1].rsplit("@", 1)[0] + tools[name] = component + return tools + + +def filter_server_tools(server): + """Remove disabled tools from the server after registration.""" + enabled_tools = get_enabled_tools() + oauth21_enabled = is_oauth21_enabled() + permissions_mode = is_permissions_mode() + if ( + enabled_tools is None + and not oauth21_enabled + and not is_read_only_mode() + and not permissions_mode + ): + return + + tools_removed = 0 + tool_components = get_tool_components(server) + + read_only_mode = is_read_only_mode() + allowed_scopes = set(get_all_read_only_scopes()) if read_only_mode else None + + tools_to_remove = set() + + # 1. Tier filtering + if enabled_tools is not None: + for tool_name in tool_components: + if not is_tool_enabled(tool_name): + tools_to_remove.add(tool_name) + + # 2. OAuth 2.1 filtering + if oauth21_enabled and "start_google_auth" in tool_components: + tools_to_remove.add("start_google_auth") + logger.info("OAuth 2.1 enabled: disabling start_google_auth tool") + + # 3. Read-only mode filtering (skipped when granular permissions are active) + if read_only_mode and not permissions_mode: + for tool_name, tool_obj in tool_components.items(): + if tool_name in tools_to_remove: + continue + + # Check if tool has required scopes attached (from @require_google_service) + func_to_check = tool_obj + if hasattr(tool_obj, "fn"): + func_to_check = tool_obj.fn + + required_scopes = getattr(func_to_check, "_required_google_scopes", []) + + if required_scopes: + # If ANY required scope is not in the allowed read-only scopes, disable the tool + if not all(scope in allowed_scopes for scope in required_scopes): + logger.info( + f"Read-only mode: Disabling tool '{tool_name}' (requires write scopes: {required_scopes})" + ) + tools_to_remove.add(tool_name) + + # 4. Granular permissions filtering + # No scope hierarchy expansion here — permission levels are already cumulative + # and explicitly define allowed scopes. Hierarchy expansion would defeat the + # purpose (e.g. gmail.modify in the hierarchy covers gmail.send, but the + # "organize" permission level intentionally excludes gmail.send). + if permissions_mode: + perm_allowed = get_allowed_scopes_set() or set() + + for tool_name, tool_obj in tool_components.items(): + if tool_name in tools_to_remove: + continue + + func_to_check = tool_obj + if hasattr(tool_obj, "fn"): + func_to_check = tool_obj.fn + + required_scopes = getattr(func_to_check, "_required_google_scopes", []) + if required_scopes: + if not all(scope in perm_allowed for scope in required_scopes): + logger.info( + "Permissions mode: Disabling tool '%s' (requires: %s)", + tool_name, + required_scopes, + ) + tools_to_remove.add(tool_name) + + for tool_name in tools_to_remove: + try: + server.local_provider.remove_tool(tool_name) + except AttributeError: + logger.warning( + "Failed to remove tool '%s': remove_tool not available on server.local_provider", + tool_name, + ) + continue + except Exception as exc: + logger.warning( + "Failed to remove tool '%s': %s", + tool_name, + exc, + ) + continue + tools_removed += 1 + + if tools_removed > 0: + enabled_count = len(enabled_tools) if enabled_tools is not None else "all" + if permissions_mode: + mode = "Permissions" + elif is_read_only_mode(): + mode = "Read-Only" + else: + mode = "Full" + logger.info( + f"Tool filtering: removed {tools_removed} tools, {enabled_count} enabled. Mode: {mode}" + ) diff --git a/core/tool_tier_loader.py b/core/tool_tier_loader.py new file mode 100644 index 0000000..57bed1e --- /dev/null +++ b/core/tool_tier_loader.py @@ -0,0 +1,196 @@ +""" +Tool Tier Loader Module + +This module provides functionality to load and resolve tool tiers from the YAML configuration. +It integrates with the existing tool enablement workflow to support tiered tool loading. +""" + +import logging +from pathlib import Path +from typing import Dict, List, Set, Literal, Optional + +import yaml + +logger = logging.getLogger(__name__) + +TierLevel = Literal["core", "extended", "complete"] + + +class ToolTierLoader: + """Loads and manages tool tiers from configuration.""" + + def __init__(self, config_path: Optional[str] = None): + """ + Initialize the tool tier loader. + + Args: + config_path: Path to the tool_tiers.yaml file. If None, uses default location. + """ + if config_path is None: + # Default to core/tool_tiers.yaml relative to this file + config_path = Path(__file__).parent / "tool_tiers.yaml" + + self.config_path = Path(config_path) + self._tiers_config: Optional[Dict] = None + + def _load_config(self) -> Dict: + """Load the tool tiers configuration from YAML file.""" + if self._tiers_config is not None: + return self._tiers_config + + if not self.config_path.exists(): + raise FileNotFoundError( + f"Tool tiers configuration not found: {self.config_path}" + ) + + try: + with open(self.config_path, "r", encoding="utf-8") as f: + self._tiers_config = yaml.safe_load(f) + logger.info(f"Loaded tool tiers configuration from {self.config_path}") + return self._tiers_config + except yaml.YAMLError as e: + raise ValueError(f"Invalid YAML in tool tiers configuration: {e}") + except Exception as e: + raise RuntimeError(f"Failed to load tool tiers configuration: {e}") + + def get_available_services(self) -> List[str]: + """Get list of all available services defined in the configuration.""" + config = self._load_config() + return list(config.keys()) + + def get_tools_for_tier( + self, tier: TierLevel, services: Optional[List[str]] = None + ) -> List[str]: + """ + Get all tools for a specific tier level. + + Args: + tier: The tier level (core, extended, complete) + services: Optional list of services to filter by. If None, includes all services. + + Returns: + List of tool names for the specified tier level + """ + config = self._load_config() + tools = [] + + # If no services specified, use all available services + if services is None: + services = self.get_available_services() + + for service in services: + if service not in config: + logger.warning( + f"Service '{service}' not found in tool tiers configuration" + ) + continue + + service_config = config[service] + if tier not in service_config: + logger.debug(f"Tier '{tier}' not defined for service '{service}'") + continue + + tier_tools = service_config[tier] + if tier_tools: # Handle empty lists + tools.extend(tier_tools) + + return tools + + def get_tools_up_to_tier( + self, tier: TierLevel, services: Optional[List[str]] = None + ) -> List[str]: + """ + Get all tools up to and including the specified tier level. + + Args: + tier: The maximum tier level to include + services: Optional list of services to filter by. If None, includes all services. + + Returns: + List of tool names up to the specified tier level + """ + tier_order = ["core", "extended", "complete"] + max_tier_index = tier_order.index(tier) + + tools = [] + for i in range(max_tier_index + 1): + current_tier = tier_order[i] + tools.extend(self.get_tools_for_tier(current_tier, services)) + + # Remove duplicates while preserving order + seen = set() + unique_tools = [] + for tool in tools: + if tool not in seen: + seen.add(tool) + unique_tools.append(tool) + + return unique_tools + + def get_services_for_tools(self, tool_names: List[str]) -> Set[str]: + """ + Get the service names that provide the specified tools. + + Args: + tool_names: List of tool names to lookup + + Returns: + Set of service names that provide any of the specified tools + """ + config = self._load_config() + services = set() + + for service, service_config in config.items(): + for tier_name, tier_tools in service_config.items(): + if tier_tools and any(tool in tier_tools for tool in tool_names): + services.add(service) + break + + return services + + +def get_tools_for_tier( + tier: TierLevel, services: Optional[List[str]] = None +) -> List[str]: + """ + Convenience function to get tools for a specific tier. + + Args: + tier: The tier level (core, extended, complete) + services: Optional list of services to filter by + + Returns: + List of tool names for the specified tier level + """ + loader = ToolTierLoader() + return loader.get_tools_up_to_tier(tier, services) + + +def resolve_tools_from_tier( + tier: TierLevel, services: Optional[List[str]] = None +) -> tuple[List[str], List[str]]: + """ + Resolve tool names and service names for the specified tier. + + Args: + tier: The tier level (core, extended, complete) + services: Optional list of services to filter by + + Returns: + Tuple of (tool_names, service_names) where: + - tool_names: List of specific tool names for the tier + - service_names: List of service names that should be imported + """ + loader = ToolTierLoader() + + # Get all tools for the tier + tools = loader.get_tools_up_to_tier(tier, services) + + # Map back to service names + service_names = loader.get_services_for_tools(tools) + + logger.info( + f"Tier '{tier}' resolved to {len(tools)} tools across {len(service_names)} services: {sorted(service_names)}" + ) + + return tools, sorted(service_names) diff --git a/core/tool_tiers.yaml b/core/tool_tiers.yaml new file mode 100644 index 0000000..666833b --- /dev/null +++ b/core/tool_tiers.yaml @@ -0,0 +1,172 @@ +gmail: + core: + - search_gmail_messages + - get_gmail_message_content + - get_gmail_messages_content_batch + - send_gmail_message + + extended: + - get_gmail_attachment_content + - get_gmail_thread_content + - modify_gmail_message_labels + - list_gmail_labels + - manage_gmail_label + - draft_gmail_message + - list_gmail_filters + - manage_gmail_filter + + complete: + - get_gmail_threads_content_batch + - batch_modify_gmail_message_labels + - start_google_auth + +drive: + core: + - search_drive_files + - get_drive_file_content + - get_drive_file_download_url + - create_drive_file + - create_drive_folder + - import_to_google_doc + - get_drive_shareable_link + extended: + - list_drive_items + - copy_drive_file + - update_drive_file + - manage_drive_access + - set_drive_file_permissions + complete: + - get_drive_file_permissions + - check_drive_file_public_access + +calendar: + core: + - list_calendars + - get_events + - manage_event + extended: + - query_freebusy + complete: [] + +docs: + core: + - get_doc_content + - create_doc + - modify_doc_text + extended: + - export_doc_to_pdf + - search_docs + - find_and_replace_doc + - list_docs_in_folder + - insert_doc_elements + - update_paragraph_style + - get_doc_as_markdown + complete: + - insert_doc_image + - update_doc_headers_footers + - batch_update_doc + - inspect_doc_structure + - create_table_with_data + - debug_table_structure + - list_document_comments + - manage_document_comment + +sheets: + core: + - create_spreadsheet + - read_sheet_values + - modify_sheet_values + extended: + - list_spreadsheets + - get_spreadsheet_info + - format_sheet_range + complete: + - create_sheet + - list_spreadsheet_comments + - manage_spreadsheet_comment + - manage_conditional_formatting + +chat: + core: + - send_message + - get_messages + - search_messages + - create_reaction + extended: + - list_spaces + - download_chat_attachment + complete: [] + +forms: + core: + - create_form + - get_form + extended: + - list_form_responses + complete: + - set_publish_settings + - get_form_response + - batch_update_form + +slides: + core: + - create_presentation + - get_presentation + extended: + - batch_update_presentation + - get_page + - get_page_thumbnail + complete: + - list_presentation_comments + - manage_presentation_comment + +tasks: + core: + - get_task + - list_tasks + - manage_task + extended: [] + complete: + - list_task_lists + - get_task_list + - manage_task_list + +contacts: + core: + - search_contacts + - get_contact + - list_contacts + - manage_contact + extended: + - list_contact_groups + - get_contact_group + complete: + - manage_contacts_batch + - manage_contact_group + +search: + core: + - search_custom + extended: [] + complete: + - get_search_engine_info + +appscript: + core: + - list_script_projects + - get_script_project + - get_script_content + - create_script_project + - update_script_content + - run_script_function + - generate_trigger_code + extended: + - manage_deployment + - list_deployments + - delete_script_project + - list_versions + - create_version + - get_version + - list_script_processes + - get_script_metrics + complete: [] diff --git a/core/utils.py b/core/utils.py new file mode 100644 index 0000000..ee91fb3 --- /dev/null +++ b/core/utils.py @@ -0,0 +1,493 @@ +import io +import logging +import os +import zipfile +import ssl +import asyncio +import functools + +from pathlib import Path +from typing import List, Optional + +from defusedxml import ElementTree as ET + +from googleapiclient.errors import HttpError +from .api_enablement import get_api_enablement_message +from auth.google_auth import GoogleAuthenticationError +from auth.oauth_config import is_oauth21_enabled, is_external_oauth21_provider + +logger = logging.getLogger(__name__) + + +class TransientNetworkError(Exception): + """Custom exception for transient network errors after retries.""" + + pass + + +class UserInputError(Exception): + """Raised for user-facing input/validation errors that shouldn't be retried.""" + + pass + + +# Directories from which local file reads are allowed. +# The user's home directory is the default safe base. +# Override via ALLOWED_FILE_DIRS env var (os.pathsep-separated paths). +_ALLOWED_FILE_DIRS_ENV = "ALLOWED_FILE_DIRS" + + +def _get_allowed_file_dirs() -> list[Path]: + """Return the list of directories from which local file access is permitted.""" + env_val = os.environ.get(_ALLOWED_FILE_DIRS_ENV) + if env_val: + return [ + Path(p).expanduser().resolve() + for p in env_val.split(os.pathsep) + if p.strip() + ] + home = Path.home() + return [home] if home else [] + + +def validate_file_path(file_path: str) -> Path: + """ + Validate that a file path is safe to read from the server filesystem. + + Resolves the path canonically (following symlinks), then verifies it falls + within one of the allowed base directories. Rejects paths to sensitive + system locations regardless of allowlist. + + Args: + file_path: The raw file path string to validate. + + Returns: + Path: The resolved, validated Path object. + + Raises: + ValueError: If the path is outside allowed directories or targets + a sensitive location. + """ + resolved = Path(file_path).resolve() + + if not resolved.exists(): + raise FileNotFoundError(f"Path does not exist: {resolved}") + + # Block sensitive file patterns regardless of allowlist + resolved_str = str(resolved) + file_name = resolved.name.lower() + + # Block .env files and variants (.env, .env.local, .env.production, etc.) + if file_name == ".env" or file_name.startswith(".env."): + raise ValueError( + f"Access to '{resolved_str}' is not allowed: " + ".env files may contain secrets and cannot be read, uploaded, or attached." + ) + + # Block well-known sensitive system paths (including macOS /private variants) + sensitive_prefixes = ( + "/proc", + "/sys", + "/dev", + "/etc/shadow", + "/etc/passwd", + "/private/etc/shadow", + "/private/etc/passwd", + ) + for prefix in sensitive_prefixes: + if resolved_str == prefix or resolved_str.startswith(prefix + "/"): + raise ValueError( + f"Access to '{resolved_str}' is not allowed: " + "path is in a restricted system location." + ) + + # Block sensitive directories that commonly contain credentials/keys + sensitive_dirs = ( + ".ssh", + ".aws", + ".kube", + ".gnupg", + ".config/gcloud", + ) + for sensitive_dir in sensitive_dirs: + home = Path.home() + blocked = home / sensitive_dir + if resolved == blocked or str(resolved).startswith(str(blocked) + "/"): + raise ValueError( + f"Access to '{resolved_str}' is not allowed: " + "path is in a directory that commonly contains secrets or credentials." + ) + + # Block other credential/secret file patterns + sensitive_names = { + ".credentials", + ".credentials.json", + "credentials.json", + "client_secret.json", + "client_secrets.json", + "service_account.json", + "service-account.json", + ".npmrc", + ".pypirc", + ".netrc", + ".git-credentials", + ".docker/config.json", + } + if file_name in sensitive_names: + raise ValueError( + f"Access to '{resolved_str}' is not allowed: " + "this file commonly contains secrets or credentials." + ) + + allowed_dirs = _get_allowed_file_dirs() + if not allowed_dirs: + raise ValueError( + "No allowed file directories configured. " + "Set the ALLOWED_FILE_DIRS environment variable or ensure a home directory exists." + ) + + for allowed in allowed_dirs: + try: + resolved.relative_to(allowed) + return resolved + except ValueError: + continue + + raise ValueError( + f"Access to '{resolved_str}' is not allowed: " + f"path is outside permitted directories ({', '.join(str(d) for d in allowed_dirs)}). " + "Set ALLOWED_FILE_DIRS to adjust." + ) + + +def check_credentials_directory_permissions(credentials_dir: str = None) -> None: + """ + Check if the service has appropriate permissions to create and write to the .credentials directory. + + Args: + credentials_dir: Path to the credentials directory (default: uses get_default_credentials_dir()) + + Raises: + PermissionError: If the service lacks necessary permissions + OSError: If there are other file system issues + """ + if credentials_dir is None: + from auth.google_auth import get_default_credentials_dir + + credentials_dir = get_default_credentials_dir() + + try: + # Check if directory exists + if os.path.exists(credentials_dir): + # Directory exists, check if we can write to it + test_file = os.path.join(credentials_dir, ".permission_test") + try: + with open(test_file, "w") as f: + f.write("test") + os.remove(test_file) + logger.info( + f"Credentials directory permissions check passed: {os.path.abspath(credentials_dir)}" + ) + except (PermissionError, OSError) as e: + raise PermissionError( + f"Cannot write to existing credentials directory '{os.path.abspath(credentials_dir)}': {e}" + ) + else: + # Directory doesn't exist, try to create it and its parent directories + try: + os.makedirs(credentials_dir, exist_ok=True) + # Test writing to the new directory + test_file = os.path.join(credentials_dir, ".permission_test") + with open(test_file, "w") as f: + f.write("test") + os.remove(test_file) + logger.info( + f"Created credentials directory with proper permissions: {os.path.abspath(credentials_dir)}" + ) + except (PermissionError, OSError) as e: + # Clean up if we created the directory but can't write to it + try: + if os.path.exists(credentials_dir): + os.rmdir(credentials_dir) + except (PermissionError, OSError): + pass + raise PermissionError( + f"Cannot create or write to credentials directory '{os.path.abspath(credentials_dir)}': {e}" + ) + + except PermissionError: + raise + except Exception as e: + raise OSError( + f"Unexpected error checking credentials directory permissions: {e}" + ) + + +def extract_office_xml_text(file_bytes: bytes, mime_type: str) -> Optional[str]: + """ + Very light-weight XML scraper for Word, Excel, PowerPoint files. + Returns plain-text if something readable is found, else None. + Uses zipfile + defusedxml.ElementTree. + """ + shared_strings: List[str] = [] + ns_excel_main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main" + + try: + with zipfile.ZipFile(io.BytesIO(file_bytes)) as zf: + targets: List[str] = [] + # Map MIME → iterable of XML files to inspect + if ( + mime_type + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ): + targets = ["word/document.xml"] + elif ( + mime_type + == "application/vnd.openxmlformats-officedocument.presentationml.presentation" + ): + targets = [n for n in zf.namelist() if n.startswith("ppt/slides/slide")] + elif ( + mime_type + == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ): + targets = [ + n + for n in zf.namelist() + if n.startswith("xl/worksheets/sheet") and "drawing" not in n + ] + # Attempt to parse sharedStrings.xml for Excel files + try: + shared_strings_xml = zf.read("xl/sharedStrings.xml") + shared_strings_root = ET.fromstring(shared_strings_xml) + for si_element in shared_strings_root.findall( + f"{{{ns_excel_main}}}si" + ): + text_parts = [] + # Find all elements, simple or within runs, and concatenate their text + for t_element in si_element.findall(f".//{{{ns_excel_main}}}t"): + if t_element.text: + text_parts.append(t_element.text) + shared_strings.append("".join(text_parts)) + except KeyError: + logger.info( + "No sharedStrings.xml found in Excel file (this is optional)." + ) + except ET.ParseError as e: + logger.error(f"Error parsing sharedStrings.xml: {e}") + except ( + Exception + ) as e: # Catch any other unexpected error during sharedStrings parsing + logger.error( + f"Unexpected error processing sharedStrings.xml: {e}", + exc_info=True, + ) + else: + return None + + pieces: List[str] = [] + for member in targets: + try: + xml_content = zf.read(member) + xml_root = ET.fromstring(xml_content) + member_texts: List[str] = [] + + if ( + mime_type + == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ): + for cell_element in xml_root.findall( + f".//{{{ns_excel_main}}}c" + ): # Find all elements + value_element = cell_element.find( + f"{{{ns_excel_main}}}v" + ) # Find under + + # Skip if cell has no value element or value element has no text + if value_element is None or value_element.text is None: + continue + + cell_type = cell_element.get("t") + if cell_type == "s": # Shared string + try: + ss_idx = int(value_element.text) + if 0 <= ss_idx < len(shared_strings): + member_texts.append(shared_strings[ss_idx]) + else: + logger.warning( + f"Invalid shared string index {ss_idx} in {member}. Max index: {len(shared_strings) - 1}" + ) + except ValueError: + logger.warning( + f"Non-integer shared string index: '{value_element.text}' in {member}." + ) + else: # Direct value (number, boolean, inline string if not 's') + member_texts.append(value_element.text) + else: # Word or PowerPoint + for elem in xml_root.iter(): + # For Word: where w is "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + # For PowerPoint: where a is "http://schemas.openxmlformats.org/drawingml/2006/main" + if ( + elem.tag.endswith("}t") and elem.text + ): # Check for any namespaced tag ending with 't' + cleaned_text = elem.text.strip() + if ( + cleaned_text + ): # Add only if there's non-whitespace text + member_texts.append(cleaned_text) + + if member_texts: + pieces.append( + " ".join(member_texts) + ) # Join texts from one member with spaces + + except ET.ParseError as e: + logger.warning( + f"Could not parse XML in member '{member}' for {mime_type} file: {e}" + ) + except Exception as e: + logger.error( + f"Error processing member '{member}' for {mime_type}: {e}", + exc_info=True, + ) + # continue processing other members + + if not pieces: # If no text was extracted at all + return None + + # Join content from different members (sheets/slides) with double newlines for separation + text = "\n\n".join(pieces).strip() + return text or None # Ensure None is returned if text is empty after strip + + except zipfile.BadZipFile: + logger.warning(f"File is not a valid ZIP archive (mime_type: {mime_type}).") + return None + except ( + ET.ParseError + ) as e: # Catch parsing errors at the top level if zipfile itself is XML-like + logger.error(f"XML parsing error at a high level for {mime_type}: {e}") + return None + except Exception as e: + logger.error( + f"Failed to extract office XML text for {mime_type}: {e}", exc_info=True + ) + return None + + +def handle_http_errors( + tool_name: str, is_read_only: bool = False, service_type: Optional[str] = None +): + """ + A decorator to handle Google API HttpErrors and transient SSL errors in a standardized way. + + It wraps a tool function, catches HttpError, logs a detailed error message, + and raises a generic Exception with a user-friendly message. + + If is_read_only is True, it will also catch ssl.SSLError and retry with + exponential backoff. After exhausting retries, it raises a TransientNetworkError. + + Args: + tool_name (str): The name of the tool being decorated (e.g., 'list_calendars'). + is_read_only (bool): If True, the operation is considered safe to retry on + transient network errors. Defaults to False. + service_type (str): Optional. The Google service type (e.g., 'calendar', 'gmail'). + """ + + def decorator(func): + @functools.wraps(func) + async def wrapper(*args, **kwargs): + max_retries = 3 + base_delay = 1 + + for attempt in range(max_retries): + try: + return await func(*args, **kwargs) + except ssl.SSLError as e: + if is_read_only and attempt < max_retries - 1: + delay = base_delay * (2**attempt) + logger.warning( + f"SSL error in {tool_name} on attempt {attempt + 1}: {e}. Retrying in {delay} seconds..." + ) + await asyncio.sleep(delay) + else: + logger.error( + f"SSL error in {tool_name} on final attempt: {e}. Raising exception." + ) + raise TransientNetworkError( + f"A transient SSL error occurred in '{tool_name}' after {max_retries} attempts. " + "This is likely a temporary network or certificate issue. Please try again shortly." + ) from e + except UserInputError as e: + message = f"Input error in {tool_name}: {e}" + logger.warning(message) + raise e + except HttpError as error: + user_google_email = kwargs.get("user_google_email", "N/A") + error_details = str(error) + + # Check if this is an API not enabled error + if ( + error.resp.status == 403 + and "accessNotConfigured" in error_details + ): + enablement_msg = get_api_enablement_message( + error_details, service_type + ) + + if enablement_msg: + message = ( + f"API error in {tool_name}: {enablement_msg}\n\n" + f"User: {user_google_email}" + ) + else: + message = ( + f"API error in {tool_name}: {error}. " + f"The required API is not enabled for your project. " + f"Please check the Google Cloud Console to enable it." + ) + elif error.resp.status in [401, 403]: + # Authentication/authorization errors + if is_oauth21_enabled(): + if is_external_oauth21_provider(): + auth_hint = ( + "LLM: Ask the user to provide a valid OAuth 2.1 " + "bearer token in the Authorization header and retry." + ) + else: + auth_hint = ( + "LLM: Ask the user to authenticate via their MCP " + "client's OAuth 2.1 flow and retry." + ) + else: + auth_hint = ( + "LLM: Try 'start_google_auth' with the user's email " + "and the appropriate service_name." + ) + message = ( + f"API error in {tool_name}: {error}. " + f"You might need to re-authenticate for user '{user_google_email}'. " + f"{auth_hint}" + ) + else: + # Other HTTP errors (400 Bad Request, etc.) - don't suggest re-auth + message = f"API error in {tool_name}: {error}" + + logger.error(f"API error in {tool_name}: {error}", exc_info=True) + raise Exception(message) from error + except TransientNetworkError: + # Re-raise without wrapping to preserve the specific error type + raise + except GoogleAuthenticationError: + # Re-raise authentication errors without wrapping + raise + except Exception as e: + message = f"An unexpected error occurred in {tool_name}: {e}" + logger.exception(message) + raise Exception(message) from e + + # Propagate _required_google_scopes if present (for tool filtering) + if hasattr(func, "_required_google_scopes"): + wrapper._required_google_scopes = func._required_google_scopes + + return wrapper + + return decorator diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..425c45a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +services: + gws_mcp: + build: . + container_name: gws_mcp + ports: + - "8000:8000" + environment: + - GOOGLE_MCP_CREDENTIALS_DIR=/app/store_creds + volumes: + - ./client_secret.json:/app/client_secret.json:ro + - store_creds:/app/store_creds:rw + env_file: + - .env + +volumes: + store_creds: \ No newline at end of file diff --git a/fastmcp.json b/fastmcp.json new file mode 100644 index 0000000..434aca6 --- /dev/null +++ b/fastmcp.json @@ -0,0 +1,21 @@ +{ + "$schema": "https://gofastmcp.com/public/schemas/fastmcp.json/v1.json", + "source": { + "path": "fastmcp_server.py", + "entrypoint": "mcp" + }, + "environment": { + "python": ">=3.10", + "project": "." + }, + "deployment": { + "transport": "http", + "host": "0.0.0.0", + "port": 8000, + "log_level": "INFO", + "env": { + "MCP_ENABLE_OAUTH21": "true", + "OAUTHLIB_INSECURE_TRANSPORT": "1" + } + } +} diff --git a/fastmcp_server.py b/fastmcp_server.py new file mode 100644 index 0000000..a1f15f6 --- /dev/null +++ b/fastmcp_server.py @@ -0,0 +1,180 @@ +# ruff: noqa +""" +FastMCP Cloud entrypoint for the Google Workspace MCP server. +Enforces OAuth 2.1 + stateless defaults required by FastMCP-hosted deployments. +""" + +import logging +import os +import sys +from dotenv import load_dotenv + +# Load environment variables BEFORE any other imports that might read them +dotenv_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env") +load_dotenv(dotenv_path=dotenv_path) + +from auth.oauth_config import reload_oauth_config, is_stateless_mode +from core.log_formatter import EnhancedLogFormatter, configure_file_logging +from core.utils import check_credentials_directory_permissions +from core.server import server, set_transport_mode, configure_server_for_http +from core.tool_registry import ( + set_enabled_tools as set_enabled_tool_names, + wrap_server_tool_method, + filter_server_tools, +) +from auth.scopes import set_enabled_tools + + +def enforce_fastmcp_cloud_defaults(): + """Force FastMCP Cloud-compatible OAuth settings before initializing the server.""" + enforced = [] + + required = { + "MCP_ENABLE_OAUTH21": "true", + "WORKSPACE_MCP_STATELESS_MODE": "true", + } + defaults = { + "MCP_SINGLE_USER_MODE": "false", + } + + for key, target in required.items(): + current = os.environ.get(key) + normalized = (current or "").lower() + if normalized != target: + os.environ[key] = target + enforced.append((key, current, target)) + + for key, target in defaults.items(): + current = os.environ.get(key) + if current != target: + os.environ[key] = target + enforced.append((key, current, target)) + + return enforced + + +_fastmcp_cloud_overrides = enforce_fastmcp_cloud_defaults() + +# Suppress googleapiclient discovery cache warning +logging.getLogger("googleapiclient.discovery_cache").setLevel(logging.ERROR) + +# Suppress httpx/httpcore INFO logs that leak access tokens in URLs +# (e.g. tokeninfo?access_token=ya29.xxx) +logging.getLogger("httpx").setLevel(logging.WARNING) +logging.getLogger("httpcore").setLevel(logging.WARNING) + +# Reload OAuth configuration after env vars loaded +reload_oauth_config() + +# Configure basic logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +if _fastmcp_cloud_overrides: + for key, previous, new_value in _fastmcp_cloud_overrides: + if previous is None: + logger.info("FastMCP Cloud: set %s=%s", key, new_value) + else: + logger.warning( + "FastMCP Cloud: overriding %s from %s to %s", key, previous, new_value + ) +else: + logger.info("FastMCP Cloud: OAuth 2.1 stateless defaults already satisfied") + +# Configure file logging based on stateless mode +configure_file_logging() + + +def configure_safe_logging(): + """Configure safe Unicode handling for logging.""" + + class SafeEnhancedFormatter(EnhancedLogFormatter): + """Enhanced ASCII formatter with additional Windows safety.""" + + def format(self, record): + try: + return super().format(record) + except UnicodeEncodeError: + # Fallback to ASCII-safe formatting + service_prefix = self._get_ascii_prefix(record.name, record.levelname) + safe_msg = ( + str(record.getMessage()) + .encode("ascii", errors="replace") + .decode("ascii") + ) + return f"{service_prefix} {safe_msg}" + + # Replace all console handlers' formatters with safe enhanced ones + for handler in logging.root.handlers: + # Only apply to console/stream handlers, keep file handlers as-is + if isinstance(handler, logging.StreamHandler) and handler.stream.name in [ + "", + "", + ]: + safe_formatter = SafeEnhancedFormatter(use_colors=True) + handler.setFormatter(safe_formatter) + + +# Configure safe logging +configure_safe_logging() + +# Check credentials directory permissions (skip in stateless mode) +if not is_stateless_mode(): + try: + logger.info("Checking credentials directory permissions...") + check_credentials_directory_permissions() + logger.info("Credentials directory permissions verified") + except (PermissionError, OSError) as e: + logger.error(f"Credentials directory permission check failed: {e}") + logger.error( + " Please ensure the service has write permissions to create/access the credentials directory" + ) + sys.exit(1) +else: + logger.info("🔍 Skipping credentials directory check (stateless mode)") + +# Set transport mode for HTTP (FastMCP CLI defaults to streamable-http) +set_transport_mode("streamable-http") + +# Import all tool modules to register their @server.tool() decorators +import gmail.gmail_tools +import gdrive.drive_tools +import gcalendar.calendar_tools +import gdocs.docs_tools +import gsheets.sheets_tools +import gchat.chat_tools +import gforms.forms_tools +import gslides.slides_tools +import gtasks.tasks_tools +import gsearch.search_tools + +# Configure tool registration +wrap_server_tool_method(server) + +# Enable all tools and services by default +all_services = [ + "gmail", + "drive", + "calendar", + "docs", + "sheets", + "chat", + "forms", + "slides", + "tasks", + "search", +] +set_enabled_tools(all_services) # Set enabled services for scopes +set_enabled_tool_names(None) # Don't filter individual tools - enable all + +# Filter tools based on configuration +filter_server_tools(server) + +# Configure authentication after scopes are known +configure_server_for_http() + +# Export server instance for FastMCP CLI (looks for 'mcp', 'server', or 'app') +mcp = server +app = server diff --git a/gappsscript/README.md b/gappsscript/README.md new file mode 100644 index 0000000..aca1324 --- /dev/null +++ b/gappsscript/README.md @@ -0,0 +1,514 @@ +# Google Apps Script MCP Tools + +This module provides Model Context Protocol (MCP) tools for interacting with Google Apps Script API, enabling AI agents to create, manage, and execute Apps Script projects programmatically. + +## Overview + +Google Apps Script allows automation and extension of Google Workspace applications. This MCP integration provides 17 tools across core and extended tiers for complete Apps Script lifecycle management. + +## Why Apps Script? + +Apps Script is the automation glue of Google Workspace. While individual service APIs (Docs, Sheets, Gmail) operate on single resources, Apps Script enables: + +- **Cross-app automation** - Orchestrate workflows across Sheets, Gmail, Calendar, Forms, and Drive +- **Persistent logic** - Host custom business rules inside Google's environment +- **Scheduled execution** - Run automations on time-based or event-driven triggers +- **Advanced integration** - Access functionality not available through standard APIs + +This MCP integration allows AI agents to author, debug, deploy, and operate these automations end-to-end - something not possible with individual Workspace APIs alone. + +### What This Enables + +| Without Apps Script MCP | With Apps Script MCP | +|------------------------|---------------------| +| Read/update Sheets, Docs, Gmail individually | Create long-lived automations across services | +| No persistent automation logic | Host business logic that executes repeatedly | +| Manual workflow orchestration | Automated multi-step workflows | +| No execution history | Debug via execution logs and status | +| No deployment versioning | Manage deployments and roll back versions | + +### Complete Workflow Example + +**Scenario:** Automated weekly report system + +``` +User: "Create a script that runs every Monday at 9 AM. It should: +1. Read data from the 'Sales' spreadsheet +2. Calculate weekly totals and growth percentages +3. Generate a summary with the top 5 performers +4. Email the report to team@company.com +5. Log any errors to a monitoring sheet" +``` + +The AI agent: +1. Creates a new Apps Script project +2. Generates the complete automation code +3. Deploys the script +4. Sets up the time-based trigger +5. Tests execution and monitors results + +All through natural language - no JavaScript knowledge required. + +### AI Agent Workflow Pattern + +The MCP client typically follows this pattern when working with Apps Script: + +1. **Inspect** - Read existing script code and project structure +2. **Analyze** - Understand current functionality and identify issues +3. **Propose** - Generate code changes or new functionality +4. **Update** - Modify files atomically with complete version control +5. **Execute** - Run functions to test changes +6. **Deploy** - Create versioned deployments for production use +7. **Monitor** - Check execution logs and debug failures + +This ensures safe, auditable automation management. + +## Features + +### Project Management +- List all Apps Script projects +- Get complete project details including all files +- Create new standalone or bound script projects +- Update script content (add/modify JavaScript files) +- Delete script projects + +### Execution +- Execute functions with parameters +- Development mode for testing latest code +- Production deployment execution +- View execution history and status + +### Deployment Management +- Create new deployments +- List all deployments for a project +- Update deployment configurations +- Delete outdated deployments + +### Version Management +- List all versions of a script +- Create immutable version snapshots +- Get details of specific versions + +### Monitoring & Analytics +- View recent script executions +- Check execution status and results +- Monitor for errors and failures +- Get execution metrics (active users, total executions, failures) + +### Trigger Code Generation +- Generate Apps Script code for time-based triggers (minutes, hours, daily, weekly) +- Generate code for event triggers (onOpen, onEdit, onFormSubmit, onChange) +- Provides ready-to-use code snippets with setup instructions + +## Limitations & Non-Goals + +**Current Limitations** +- Direct trigger management via API is not supported (use `generate_trigger_code` instead) +- Real-time debugging and breakpoints are not available +- Advanced service enablement must be done manually in the script editor + +**Non-Goals** +- This integration does not replace the Apps Script editor UI +- Does not execute arbitrary JavaScript outside defined script functions +- Does not provide IDE features like autocomplete or syntax highlighting + +**Workarounds** +- Triggers: Use `generate_trigger_code` to get ready-to-use Apps Script code for any trigger type +- Advanced services can be enabled via the manifest file (appsscript.json) +- Debugging is supported through execution logs, metrics, and error monitoring + +## Prerequisites + +### 1. Google Cloud Project Setup + +Before using the Apps Script MCP tools, configure your Google Cloud project: + +**Step 1: Enable Required APIs** + +Enable these APIs in your Google Cloud Console: + +1. [Apps Script API](https://console.cloud.google.com/flows/enableapi?apiid=script.googleapis.com) (required for all operations) +2. [Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com) (required for listing projects) + +**Step 2: Create OAuth Credentials** + +1. Go to [APIs & Services > Credentials](https://console.cloud.google.com/apis/credentials) +2. Click "Create Credentials" > "OAuth client ID" +3. Select "Desktop application" as the application type +4. Download the JSON file and save as `client_secret.json` + +**Step 3: Configure OAuth Consent Screen** + +1. Go to [OAuth consent screen](https://console.cloud.google.com/apis/credentials/consent) +2. Add yourself as a test user (required for unverified apps) +3. Add the required scopes (see below) + +### 2. OAuth Scopes + +The following OAuth scopes are required: + +``` +https://www.googleapis.com/auth/script.projects +https://www.googleapis.com/auth/script.projects.readonly +https://www.googleapis.com/auth/script.deployments +https://www.googleapis.com/auth/script.deployments.readonly +https://www.googleapis.com/auth/script.processes +https://www.googleapis.com/auth/script.metrics +https://www.googleapis.com/auth/drive.file +``` + +These are automatically requested when using the appscript tool tier. + +### 3. Running the MCP Server + +Start the server with Apps Script tools enabled: + +```bash +uv run main.py --tools appscript --single-user +``` + +Or include with other tools: + +```bash +uv run main.py --tools appscript drive sheets +``` + +On first use, you will be prompted to authorize the application. Complete the OAuth flow in your browser. + +## Tool Tiers + +### Core Tier +Essential operations for reading, writing, and executing scripts: + +- `list_script_projects`: List accessible projects +- `get_script_project`: Get full project with all files +- `get_script_content`: Get specific file content +- `create_script_project`: Create new project +- `update_script_content`: Modify project files +- `run_script_function`: Execute functions +- `generate_trigger_code`: Generate trigger setup code + +### Extended Tier +Advanced deployment, versioning, and monitoring: + +- `create_deployment`: Create new deployment +- `list_deployments`: List all deployments +- `update_deployment`: Update deployment config +- `delete_deployment`: Remove deployment +- `delete_script_project`: Delete a project permanently +- `list_versions`: List all versions +- `create_version`: Create immutable version snapshot +- `get_version`: Get version details +- `list_script_processes`: View execution history +- `get_script_metrics`: Get execution analytics + +## Usage Examples + +### List Projects + +```python +# List all Apps Script projects +uv run main.py --tools appscript +# In MCP client: "Show me my Apps Script projects" +``` + +Example output: +``` +Found 3 Apps Script projects: +- Email Automation (ID: abc123) Created: 2025-01-10 Modified: 2026-01-12 +- Sheet Processor (ID: def456) Created: 2025-06-15 Modified: 2025-12-20 +- Form Handler (ID: ghi789) Created: 2024-11-03 Modified: 2025-08-14 +``` + +### Create New Project + +```python +# Create a new Apps Script project +# In MCP client: "Create a new Apps Script project called 'Data Sync'" +``` + +Example output: +``` +Created Apps Script project: Data Sync +Script ID: new123 +Edit URL: https://script.google.com/d/new123/edit +``` + +### Get Project Details + +```python +# Get complete project with all files +# In MCP client: "Show me the code for script abc123" +``` + +Example output: +``` +Project: Email Automation (ID: abc123) +Creator: user@example.com +Created: 2025-01-10 +Modified: 2026-01-12 + +Files: +1. Code.gs (SERVER_JS) + function sendDailyEmail() { + var sheet = SpreadsheetApp.getActiveSpreadsheet(); + // ... email logic + } + +2. appsscript.json (JSON) + {"timeZone": "America/New_York", "dependencies": {}} +``` + +### Update Script Content + +```python +# Update script files +# In MCP client: "Update my email script to add error handling" +``` + +The AI will: +1. Read current code +2. Generate improved version +3. Call `update_script_content` with new files + +### Run Script Function + +```python +# Execute a function +# In MCP client: "Run the sendDailyEmail function in script abc123" +``` + +Example output: +``` +Execution successful +Function: sendDailyEmail +Result: Emails sent to 5 recipients +``` + +### Create Deployment + +```python +# Deploy script for production +# In MCP client: "Deploy my email automation to production" +``` + +Example output: +``` +Created deployment for script: abc123 +Deployment ID: AKfy...xyz +Description: Production release +``` + +## Common Workflows + +### 1. Create Automated Workflow (Complete Example) + +**Scenario:** Form submission handler that sends customized emails + +``` +User: "When someone submits the Contact Form: +1. Get their email and department from the form response +2. Look up their manager in the Team Directory spreadsheet +3. Send a welcome email to the submitter +4. Send a notification to their manager +5. Log the interaction in the Onboarding Tracker sheet" +``` + +**AI Agent Steps:** +``` +1. "Create a new Apps Script bound to the Contact Form" +2. "Add a function that reads form submissions" +3. "Connect to the Team Directory spreadsheet to look up managers" +4. "Generate personalized email templates for both messages" +5. "Add logging to the Onboarding Tracker" +6. "Run the function to test it with sample data" +7. "Create a production deployment" +``` + +Result: Complete automation created and deployed without writing code. + +### 2. Debug Existing Script + +``` +User: "My expense tracker script is failing" +AI: "Show me the code for the expense tracker script" +AI: "What errors occurred in recent executions?" +AI: "The calculateTotal function has a division by zero error on line 23" +AI: "Fix the error by adding a check for zero values" +AI: "Run calculateTotal to verify the fix" +User: "Create a new deployment with the bug fix" +``` + +### 3. Modify and Extend Automation + +``` +User: "Update my weekly report script to include sales data from the Q1 sheet" +AI: "Read the current report generation script" +AI: "Add Q1 data fetching to the generateReport function" +AI: "Test the updated function" +User: "Looks good, deploy it" +AI: "Create a new deployment with description 'Added Q1 sales data'" +``` + +### 4. Run Existing Business Logic + +``` +User: "Run the monthlyCleanup function in my Data Management script" +User: "What does the calculateCommission function do?" +User: "Execute reconcileAccounts with parameters: ['2024', 'January']" +``` + +## File Types + +Apps Script projects support three file types: + +- **SERVER_JS**: Google Apps Script code (.gs files) +- **HTML**: HTML files for custom UIs +- **JSON**: Manifest file (appsscript.json) + +## API Limitations + +### Execution Timeouts +- Simple triggers: 30 seconds +- Custom functions: 30 seconds +- Script execution via API: 6 minutes + +### Quota Limits +- Script executions per day: varies by account type +- URL Fetch calls: 20,000 per day (consumer accounts) + +See [Apps Script Quotas](https://developers.google.com/apps-script/guides/services/quotas) for details. + +### Cannot Execute Arbitrary Code +The `run_script_function` tool can only execute functions that are defined in the script. You cannot run arbitrary JavaScript code directly. To run new code: + +1. Add function to script via `update_script_content` +2. Execute the function via `run_script_function` +3. Optionally remove the function after execution + +### run_script_function Requires API Executable Deployment + +The `run_script_function` tool requires additional manual configuration in the Apps Script editor: + +**Why this limitation exists:** +Google requires scripts to be explicitly deployed as "API Executable" before they can be invoked via the Apps Script API. This is a security measure to prevent unauthorized code execution. + +**To enable API execution:** + +1. Open the script in the Apps Script editor +2. Go to Project Settings (gear icon) +3. Under "Google Cloud Platform (GCP) Project", click "Change project" +4. Enter your GCP project number (found in Cloud Console dashboard) +5. Click "Deploy" > "New deployment" +6. Select type: "API Executable" +7. Set "Who has access" to "Anyone" or "Anyone with Google account" +8. Click "Deploy" + +After completing these steps, the `run_script_function` tool will work for that script. + +**Note:** All other tools (create, update, list, deploy) work without this manual step. Only function execution via API requires the API Executable deployment. + +## Error Handling + +Common errors and solutions: + +### 404: Script not found +- Verify script ID is correct +- Ensure you have access to the project + +### 403: Permission denied +- Check OAuth scopes are authorized +- Verify you own or have access to the project + +### Execution timeout +- Script exceeded 6-minute limit +- Optimize code or split into smaller functions + +### Script authorization required +- Function needs additional permissions +- User must manually authorize in script editor + +## Security Considerations + +### OAuth Scopes +Scripts inherit the OAuth scopes of the MCP server. Functions that access other Google services (Gmail, Drive, etc.) will only work if those scopes are authorized. + +### Script Permissions +Scripts run with the permissions of the script owner, not the user executing them. Be cautious when: +- Running scripts you did not create +- Granting additional permissions to scripts +- Executing functions that modify data + +### Code Review +Always review code before executing, especially for: +- Scripts from unknown sources +- Functions that access sensitive data +- Operations that modify or delete data + +## Testing + +### Unit Tests +Run unit tests with mocked API responses: + +```bash +uv run pytest tests/gappsscript/test_apps_script_tools.py +``` + +### Manual Testing +Test against real Apps Script API: + +```bash +python tests/gappsscript/manual_test.py +``` + +Note: Manual tests create real projects in your account. Delete test projects after running. + +## References + +### Apps Script Documentation +- [Apps Script Overview](https://developers.google.com/apps-script/overview) - Introduction and capabilities +- [Apps Script Guides](https://developers.google.com/apps-script/guides/services) - Service-specific guides +- [Apps Script Reference](https://developers.google.com/apps-script/reference) - Complete API reference + +### Apps Script API (for this MCP integration) +- [Apps Script API Overview](https://developers.google.com/apps-script/api) - API features and concepts +- [REST API Reference](https://developers.google.com/apps-script/api/reference/rest) - Endpoint documentation +- [OAuth Scopes](https://developers.google.com/apps-script/api/how-tos/authorization) - Required permissions + +### Useful Resources +- [Apps Script Quotas](https://developers.google.com/apps-script/guides/services/quotas) - Usage limits and restrictions +- [Best Practices](https://developers.google.com/apps-script/guides/support/best-practices) - Performance and optimization +- [Troubleshooting](https://developers.google.com/apps-script/guides/support/troubleshooting) - Common issues and solutions + +## Troubleshooting + +### "Apps Script API has not been used in project" +Enable the API in Google Cloud Console + +### "Insufficient Permission" +- Verify OAuth scopes are authorized +- Re-authenticate if needed + +### "Function not found" +- Check function name spelling +- Verify function exists in the script +- Ensure function is not private + +### "Invalid project structure" +- Ensure at least one .gs file exists +- Verify JSON files are valid JSON +- Check file names don't contain invalid characters + +## Contributing + +When adding new Apps Script tools: + +1. Follow existing patterns in `apps_script_tools.py` +2. Add comprehensive docstrings +3. Include unit tests +4. Update this README with examples +5. Test against real API before submitting + +## License + +MIT License - see project root LICENSE file diff --git a/gappsscript/TESTING.md b/gappsscript/TESTING.md new file mode 100644 index 0000000..8fdfd4a --- /dev/null +++ b/gappsscript/TESTING.md @@ -0,0 +1,254 @@ +# Apps Script MCP Testing Guide + +This document provides instructions for running unit tests and end-to-end (E2E) tests for the Apps Script MCP feature. + +## Test Structure + +``` +tests/gappsscript/ + __init__.py + test_apps_script_tools.py # Unit tests with mocked API + manual_test.py # E2E tests against real API +``` + +## Unit Tests + +Unit tests use mocked API responses and do not require Google credentials. + +### Running Unit Tests + +```bash +# Run all Apps Script unit tests +uv run pytest tests/gappsscript/test_apps_script_tools.py -v + +# Run specific test +uv run pytest tests/gappsscript/test_apps_script_tools.py::test_list_script_projects -v + +# Run with coverage +uv run pytest tests/gappsscript/test_apps_script_tools.py --cov=gappsscript +``` + +### Test Coverage + +Unit tests cover: +- list_script_projects (uses Drive API) +- get_script_project +- get_script_content +- create_script_project +- update_script_content +- run_script_function +- create_deployment +- list_deployments +- update_deployment +- delete_deployment +- list_script_processes + +## E2E Tests + +E2E tests interact with the real Google Apps Script API. They require valid OAuth credentials and will create real resources in your Google account. + +### Prerequisites + +1. **Google Cloud Project** with Apps Script API and Drive API enabled +2. **OAuth credentials** (Desktop application type) +3. **Test user** added to OAuth consent screen + +### Setup + +**Option 1: Default paths (recommended for CI)** + +Place credentials in the project root: +```bash +# Place your OAuth client credentials here +cp /path/to/your/client_secret.json ./client_secret.json +``` + +**Option 2: Custom paths via environment variables** + +```bash +export GOOGLE_CLIENT_SECRET_PATH=/path/to/client_secret.json +export GOOGLE_TOKEN_PATH=/path/to/token.pickle +``` + +### Running E2E Tests + +```bash +# Interactive mode (prompts for confirmation) +uv run python tests/gappsscript/manual_test.py + +# Non-interactive mode (for CI) +uv run python tests/gappsscript/manual_test.py --yes +``` + +### E2E Test Flow + +The test script performs the following operations: + +1. **List Projects** - Lists existing Apps Script projects via Drive API +2. **Create Project** - Creates a new test project +3. **Get Project** - Retrieves project details +4. **Update Content** - Adds code to the project +5. **Run Function** - Attempts to execute a function (see note below) +6. **Create Deployment** - Creates a versioned deployment +7. **List Deployments** - Lists all deployments +8. **List Processes** - Lists recent script executions + +### Cleanup + +The test script does not automatically delete created projects. After running tests: + +1. Go to [Google Apps Script](https://script.google.com/) +2. Find projects named "MCP Test Project" +3. Delete them manually via the menu (three dots) > Remove + +## Headless Linux Testing + +For headless environments (servers, CI/CD, WSL without GUI): + +### OAuth Authentication Flow + +The test script uses a headless-compatible OAuth flow: + +1. Script prints an authorization URL +2. Open the URL in any browser (can be on a different machine) +3. Complete Google sign-in and authorization +4. Browser redirects to `http://localhost/?code=...` (page will not load) +5. Copy the full URL from the browser address bar +6. Paste it into the terminal when prompted + +### Example Session + +``` +$ python tests/gappsscript/manual_test.py --yes + +============================================================ +HEADLESS AUTH +============================================================ + +1. Open this URL in any browser: + +https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=... + +2. Sign in and authorize the app +3. You'll be redirected to http://localhost (won't load) +4. Copy the FULL URL from browser address bar + (looks like: http://localhost/?code=4/0A...&scope=...) +5. Paste it below: + +Paste full redirect URL: http://localhost/?code=4/0AQSTgQ...&scope=... + +Building API services... + +=== Test: List Projects === +Found 3 Apps Script projects: +... +``` + +### Credential Storage + +OAuth tokens are stored as pickle files: +- Default: `./test_token.pickle` in project root +- Custom: Set via `GOOGLE_TOKEN_PATH` environment variable + +Tokens are reused on subsequent runs until they expire or are revoked. + +## Known Limitations and Caveats + +### run_script_function Test Failure + +The "Run Function" test will fail with a 404 error unless you manually configure the script as an API Executable. This is a Google platform requirement, not a bug. + +To make run_script_function work: + +1. Open the created test script in Apps Script editor +2. Go to Project Settings > Change GCP project +3. Enter your GCP project number +4. Deploy as "API Executable" + +For E2E testing purposes, it is acceptable for this test to fail. All other tests should pass. + +### Drive API Requirement + +The `list_script_projects` function uses the Google Drive API (not the Apps Script API) because the Apps Script API does not provide a projects.list endpoint. Ensure the Drive API is enabled in your GCP project. + +### Scope Requirements + +The E2E tests require these scopes: +- `script.projects` and `script.projects.readonly` +- `script.deployments` and `script.deployments.readonly` +- `script.processes` +- `drive.readonly` + +If you encounter "insufficient scopes" errors, delete the stored token file and re-authenticate. + +### Rate Limits + +Google enforces rate limits on the Apps Script API. If running tests repeatedly, you may encounter quota errors. Wait a few minutes before retrying. + +## CI/CD Integration + +For automated testing in CI/CD pipelines: + +### Unit Tests Only (Recommended) + +```yaml +# GitHub Actions example +- name: Run unit tests + run: uv run pytest tests/gappsscript/test_apps_script_tools.py -v +``` + +### E2E Tests in CI + +E2E tests require OAuth credentials. Options: + +1. **Skip E2E in CI** - Run only unit tests in CI, run E2E locally +2. **Service Account** - Not supported (Apps Script API requires user OAuth) +3. **Pre-authenticated Token** - Store encrypted token in CI secrets + +To use a pre-authenticated token: +```bash +# Generate token locally +python tests/gappsscript/manual_test.py + +# Store test_token.pickle contents as base64 in CI secret +base64 test_token.pickle > token.b64 + +# In CI, restore and set path +echo $TOKEN_SECRET | base64 -d > test_token.pickle +export GOOGLE_TOKEN_PATH=./test_token.pickle +python tests/gappsscript/manual_test.py --yes +``` + +Note: Tokens expire and must be refreshed periodically. + +## Troubleshooting + +### "Apps Script API has not been used in project" + +Enable the Apps Script API in your GCP project: +https://console.cloud.google.com/flows/enableapi?apiid=script.googleapis.com + +### "Access Not Configured. Drive API has not been used" + +Enable the Drive API in your GCP project: +https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com + +### "Request had insufficient authentication scopes" + +Delete the token file and re-authenticate: +```bash +rm test_token.pickle +python tests/gappsscript/manual_test.py +``` + +### "User is not authorized to access this resource" + +Ensure your email is added as a test user in the OAuth consent screen configuration. + +### "Requested entity was not found" (404 on run) + +The script needs to be deployed as "API Executable". See the run_script_function section above. + +### OAuth redirect fails on headless machine + +The redirect to `http://localhost` is expected to fail. Copy the URL from the browser address bar (including the error page URL) and paste it into the terminal. diff --git a/gappsscript/__init__.py b/gappsscript/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gappsscript/apps_script_tools.py b/gappsscript/apps_script_tools.py new file mode 100644 index 0000000..eaf491d --- /dev/null +++ b/gappsscript/apps_script_tools.py @@ -0,0 +1,1284 @@ +""" +Google Apps Script MCP Tools + +This module provides MCP tools for interacting with Google Apps Script API. +""" + +import logging +import asyncio +from typing import List, Dict, Any, Optional + +from auth.service_decorator import require_google_service +from core.server import server +from core.utils import handle_http_errors + +logger = logging.getLogger(__name__) + + +# Internal implementation functions for testing +async def _list_script_projects_impl( + service: Any, + user_google_email: str, + page_size: int = 50, + page_token: Optional[str] = None, +) -> str: + """Internal implementation for list_script_projects. + + Uses Drive API to find Apps Script files since the Script API + does not have a projects.list method. + """ + logger.info( + f"[list_script_projects] Email: {user_google_email}, PageSize: {page_size}" + ) + + # Search for Apps Script files using Drive API + query = "mimeType='application/vnd.google-apps.script' and trashed=false" + request_params = { + "q": query, + "pageSize": page_size, + "fields": "nextPageToken, files(id, name, createdTime, modifiedTime)", + "orderBy": "modifiedTime desc", + } + if page_token: + request_params["pageToken"] = page_token + + response = await asyncio.to_thread(service.files().list(**request_params).execute) + + files = response.get("files", []) + + if not files: + return "No Apps Script projects found." + + output = [f"Found {len(files)} Apps Script projects:"] + for file in files: + title = file.get("name", "Untitled") + script_id = file.get("id", "Unknown ID") + create_time = file.get("createdTime", "Unknown") + update_time = file.get("modifiedTime", "Unknown") + + output.append( + f"- {title} (ID: {script_id}) Created: {create_time} Modified: {update_time}" + ) + + if "nextPageToken" in response: + output.append(f"\nNext page token: {response['nextPageToken']}") + + logger.info( + f"[list_script_projects] Found {len(files)} projects for {user_google_email}" + ) + return "\n".join(output) + + +@server.tool() +@handle_http_errors("list_script_projects", is_read_only=True, service_type="drive") +@require_google_service("drive", "drive_read") +async def list_script_projects( + service: Any, + user_google_email: str, + page_size: int = 50, + page_token: Optional[str] = None, +) -> str: + """ + Lists Google Apps Script projects accessible to the user. + + Uses Drive API to find Apps Script files. + + Args: + service: Injected Google API service client + user_google_email: User's email address + page_size: Number of results per page (default: 50) + page_token: Token for pagination (optional) + + Returns: + str: Formatted list of script projects + """ + return await _list_script_projects_impl( + service, user_google_email, page_size, page_token + ) + + +async def _get_script_project_impl( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """Internal implementation for get_script_project.""" + logger.info(f"[get_script_project] Email: {user_google_email}, ID: {script_id}") + + # Get project metadata and content concurrently (independent requests) + project, content = await asyncio.gather( + asyncio.to_thread(service.projects().get(scriptId=script_id).execute), + asyncio.to_thread(service.projects().getContent(scriptId=script_id).execute), + ) + + title = project.get("title", "Untitled") + project_script_id = project.get("scriptId", "Unknown") + creator = project.get("creator", {}).get("email", "Unknown") + create_time = project.get("createTime", "Unknown") + update_time = project.get("updateTime", "Unknown") + + output = [ + f"Project: {title} (ID: {project_script_id})", + f"Creator: {creator}", + f"Created: {create_time}", + f"Modified: {update_time}", + "", + "Files:", + ] + + files = content.get("files", []) + for i, file in enumerate(files, 1): + file_name = file.get("name", "Untitled") + file_type = file.get("type", "Unknown") + source = file.get("source", "") + + output.append(f"{i}. {file_name} ({file_type})") + if source: + output.append(f" {source[:200]}{'...' if len(source) > 200 else ''}") + output.append("") + + logger.info(f"[get_script_project] Retrieved project {script_id}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("get_script_project", is_read_only=True, service_type="script") +@require_google_service("script", "script_readonly") +async def get_script_project( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """ + Retrieves complete project details including all source files. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + + Returns: + str: Formatted project details with all file contents + """ + return await _get_script_project_impl(service, user_google_email, script_id) + + +async def _get_script_content_impl( + service: Any, + user_google_email: str, + script_id: str, + file_name: str, +) -> str: + """Internal implementation for get_script_content.""" + logger.info( + f"[get_script_content] Email: {user_google_email}, ID: {script_id}, File: {file_name}" + ) + + # Must use getContent() to retrieve files, not get() which only returns metadata + content = await asyncio.to_thread( + service.projects().getContent(scriptId=script_id).execute + ) + + files = content.get("files", []) + target_file = None + + for file in files: + if file.get("name") == file_name: + target_file = file + break + + if not target_file: + return f"File '{file_name}' not found in project {script_id}" + + source = target_file.get("source", "") + file_type = target_file.get("type", "Unknown") + + output = [f"File: {file_name} ({file_type})", "", source] + + logger.info(f"[get_script_content] Retrieved file {file_name} from {script_id}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("get_script_content", is_read_only=True, service_type="script") +@require_google_service("script", "script_readonly") +async def get_script_content( + service: Any, + user_google_email: str, + script_id: str, + file_name: str, +) -> str: + """ + Retrieves content of a specific file within a project. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + file_name: Name of the file to retrieve + + Returns: + str: File content as string + """ + return await _get_script_content_impl( + service, user_google_email, script_id, file_name + ) + + +async def _create_script_project_impl( + service: Any, + user_google_email: str, + title: str, + parent_id: Optional[str] = None, +) -> str: + """Internal implementation for create_script_project.""" + logger.info(f"[create_script_project] Email: {user_google_email}, Title: {title}") + + request_body = {"title": title} + + if parent_id: + request_body["parentId"] = parent_id + + project = await asyncio.to_thread( + service.projects().create(body=request_body).execute + ) + + script_id = project.get("scriptId", "Unknown") + edit_url = f"https://script.google.com/d/{script_id}/edit" + + output = [ + f"Created Apps Script project: {title}", + f"Script ID: {script_id}", + f"Edit URL: {edit_url}", + ] + + logger.info(f"[create_script_project] Created project {script_id}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("create_script_project", service_type="script") +@require_google_service("script", "script_projects") +async def create_script_project( + service: Any, + user_google_email: str, + title: str, + parent_id: Optional[str] = None, +) -> str: + """ + Creates a new Apps Script project. + + Args: + service: Injected Google API service client + user_google_email: User's email address + title: Project title + parent_id: Optional Drive folder ID or bound container ID + + Returns: + str: Formatted string with new project details + """ + return await _create_script_project_impl( + service, user_google_email, title, parent_id + ) + + +async def _update_script_content_impl( + service: Any, + user_google_email: str, + script_id: str, + files: List[Dict[str, str]], +) -> str: + """Internal implementation for update_script_content.""" + logger.info( + f"[update_script_content] Email: {user_google_email}, ID: {script_id}, Files: {len(files)}" + ) + + request_body = {"files": files} + + updated_content = await asyncio.to_thread( + service.projects().updateContent(scriptId=script_id, body=request_body).execute + ) + + output = [f"Updated script project: {script_id}", "", "Modified files:"] + + for file in updated_content.get("files", []): + file_name = file.get("name", "Untitled") + file_type = file.get("type", "Unknown") + output.append(f"- {file_name} ({file_type})") + + logger.info(f"[update_script_content] Updated {len(files)} files in {script_id}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("update_script_content", service_type="script") +@require_google_service("script", "script_projects") +async def update_script_content( + service: Any, + user_google_email: str, + script_id: str, + files: List[Dict[str, str]], +) -> str: + """ + Updates or creates files in a script project. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + files: List of file objects with name, type, and source + + Returns: + str: Formatted string confirming update with file list + """ + return await _update_script_content_impl( + service, user_google_email, script_id, files + ) + + +async def _run_script_function_impl( + service: Any, + user_google_email: str, + script_id: str, + function_name: str, + parameters: Optional[list[object]] = None, + dev_mode: bool = False, +) -> str: + """Internal implementation for run_script_function.""" + logger.info( + f"[run_script_function] Email: {user_google_email}, ID: {script_id}, Function: {function_name}" + ) + + request_body = {"function": function_name, "devMode": dev_mode} + + if parameters: + request_body["parameters"] = parameters + + try: + response = await asyncio.to_thread( + service.scripts().run(scriptId=script_id, body=request_body).execute + ) + + if "error" in response: + error_details = response["error"] + error_message = error_details.get("message", "Unknown error") + return ( + f"Execution failed\nFunction: {function_name}\nError: {error_message}" + ) + + result = response.get("response", {}).get("result") + output = [ + "Execution successful", + f"Function: {function_name}", + f"Result: {result}", + ] + + logger.info(f"[run_script_function] Successfully executed {function_name}") + return "\n".join(output) + + except Exception as e: + logger.error(f"[run_script_function] Execution error: {str(e)}") + return f"Execution failed\nFunction: {function_name}\nError: {str(e)}" + + +@server.tool() +@handle_http_errors("run_script_function", service_type="script") +@require_google_service("script", "script_projects") +async def run_script_function( + service: Any, + user_google_email: str, + script_id: str, + function_name: str, + parameters: Optional[list[object]] = None, + dev_mode: bool = False, +) -> str: + """ + Executes a function in a deployed script. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + function_name: Name of function to execute + parameters: Optional list of parameters to pass + dev_mode: Whether to run latest code vs deployed version + + Returns: + str: Formatted string with execution result or error + """ + return await _run_script_function_impl( + service, user_google_email, script_id, function_name, parameters, dev_mode + ) + + +async def _create_deployment_impl( + service: Any, + user_google_email: str, + script_id: str, + description: str, + version_description: Optional[str] = None, +) -> str: + """Internal implementation for create_deployment. + + Creates a new version first, then creates a deployment using that version. + """ + logger.info( + f"[create_deployment] Email: {user_google_email}, ID: {script_id}, Desc: {description}" + ) + + # First, create a new version + version_body = {"description": version_description or description} + version = await asyncio.to_thread( + service.projects() + .versions() + .create(scriptId=script_id, body=version_body) + .execute + ) + version_number = version.get("versionNumber") + logger.info(f"[create_deployment] Created version {version_number}") + + # Now create the deployment with the version number + deployment_body = { + "versionNumber": version_number, + "description": description, + } + + deployment = await asyncio.to_thread( + service.projects() + .deployments() + .create(scriptId=script_id, body=deployment_body) + .execute + ) + + deployment_id = deployment.get("deploymentId", "Unknown") + + output = [ + f"Created deployment for script: {script_id}", + f"Deployment ID: {deployment_id}", + f"Version: {version_number}", + f"Description: {description}", + ] + + logger.info(f"[create_deployment] Created deployment {deployment_id}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("manage_deployment", service_type="script") +@require_google_service("script", "script_deployments") +async def manage_deployment( + service: Any, + user_google_email: str, + action: str, + script_id: str, + deployment_id: Optional[str] = None, + description: Optional[str] = None, + version_description: Optional[str] = None, +) -> str: + """ + Manages Apps Script deployments. Supports creating, updating, and deleting deployments. + + Args: + service: Injected Google API service client + user_google_email: User's email address + action: Action to perform - "create", "update", or "delete" + script_id: The script project ID + deployment_id: The deployment ID (required for update and delete) + description: Deployment description (required for create and update) + version_description: Optional version description (for create only) + + Returns: + str: Formatted string with deployment details or confirmation + """ + action = action.lower().strip() + if action == "create": + if description is None or description.strip() == "": + raise ValueError("description is required for create action") + return await _create_deployment_impl( + service, user_google_email, script_id, description, version_description + ) + elif action == "update": + if not deployment_id: + raise ValueError("deployment_id is required for update action") + if description is None or description.strip() == "": + raise ValueError("description is required for update action") + return await _update_deployment_impl( + service, user_google_email, script_id, deployment_id, description + ) + elif action == "delete": + if not deployment_id: + raise ValueError("deployment_id is required for delete action") + return await _delete_deployment_impl( + service, user_google_email, script_id, deployment_id + ) + else: + raise ValueError( + f"Invalid action '{action}'. Must be 'create', 'update', or 'delete'." + ) + + +async def _list_deployments_impl( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """Internal implementation for list_deployments.""" + logger.info(f"[list_deployments] Email: {user_google_email}, ID: {script_id}") + + response = await asyncio.to_thread( + service.projects().deployments().list(scriptId=script_id).execute + ) + + deployments = response.get("deployments", []) + + if not deployments: + return f"No deployments found for script: {script_id}" + + output = [f"Deployments for script: {script_id}", ""] + + for i, deployment in enumerate(deployments, 1): + deployment_id = deployment.get("deploymentId", "Unknown") + description = deployment.get("description", "No description") + update_time = deployment.get("updateTime", "Unknown") + + output.append(f"{i}. {description} ({deployment_id})") + output.append(f" Updated: {update_time}") + output.append("") + + logger.info(f"[list_deployments] Found {len(deployments)} deployments") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("list_deployments", is_read_only=True, service_type="script") +@require_google_service("script", "script_deployments_readonly") +async def list_deployments( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """ + Lists all deployments for a script project. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + + Returns: + str: Formatted string with deployment list + """ + return await _list_deployments_impl(service, user_google_email, script_id) + + +async def _update_deployment_impl( + service: Any, + user_google_email: str, + script_id: str, + deployment_id: str, + description: Optional[str] = None, +) -> str: + """Internal implementation for update_deployment.""" + logger.info( + f"[update_deployment] Email: {user_google_email}, Script: {script_id}, Deployment: {deployment_id}" + ) + + request_body = {} + if description: + request_body["description"] = description + + deployment = await asyncio.to_thread( + service.projects() + .deployments() + .update(scriptId=script_id, deploymentId=deployment_id, body=request_body) + .execute + ) + + output = [ + f"Updated deployment: {deployment_id}", + f"Script: {script_id}", + f"Description: {deployment.get('description', 'No description')}", + ] + + logger.info(f"[update_deployment] Updated deployment {deployment_id}") + return "\n".join(output) + + +async def _delete_deployment_impl( + service: Any, + user_google_email: str, + script_id: str, + deployment_id: str, +) -> str: + """Internal implementation for delete_deployment.""" + logger.info( + f"[delete_deployment] Email: {user_google_email}, Script: {script_id}, Deployment: {deployment_id}" + ) + + await asyncio.to_thread( + service.projects() + .deployments() + .delete(scriptId=script_id, deploymentId=deployment_id) + .execute + ) + + output = f"Deleted deployment: {deployment_id} from script: {script_id}" + + logger.info(f"[delete_deployment] Deleted deployment {deployment_id}") + return output + + +async def _list_script_processes_impl( + service: Any, + user_google_email: str, + page_size: int = 50, + script_id: Optional[str] = None, +) -> str: + """Internal implementation for list_script_processes.""" + logger.info( + f"[list_script_processes] Email: {user_google_email}, PageSize: {page_size}" + ) + + request_params = {"pageSize": page_size} + if script_id: + request_params["scriptId"] = script_id + + response = await asyncio.to_thread( + service.processes().list(**request_params).execute + ) + + processes = response.get("processes", []) + + if not processes: + return "No recent script executions found." + + output = ["Recent script executions:", ""] + + for i, process in enumerate(processes, 1): + function_name = process.get("functionName", "Unknown") + process_status = process.get("processStatus", "Unknown") + start_time = process.get("startTime", "Unknown") + duration = process.get("duration", "Unknown") + + output.append(f"{i}. {function_name}") + output.append(f" Status: {process_status}") + output.append(f" Started: {start_time}") + output.append(f" Duration: {duration}") + output.append("") + + logger.info(f"[list_script_processes] Found {len(processes)} processes") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("list_script_processes", is_read_only=True, service_type="script") +@require_google_service("script", "script_readonly") +async def list_script_processes( + service: Any, + user_google_email: str, + page_size: int = 50, + script_id: Optional[str] = None, +) -> str: + """ + Lists recent execution processes for user's scripts. + + Args: + service: Injected Google API service client + user_google_email: User's email address + page_size: Number of results (default: 50) + script_id: Optional filter by script ID + + Returns: + str: Formatted string with process list + """ + return await _list_script_processes_impl( + service, user_google_email, page_size, script_id + ) + + +# ============================================================================ +# Delete Script Project +# ============================================================================ + + +async def _delete_script_project_impl( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """Internal implementation for delete_script_project.""" + logger.info( + f"[delete_script_project] Email: {user_google_email}, ScriptID: {script_id}" + ) + + # Apps Script projects are stored as Drive files + await asyncio.to_thread(service.files().delete(fileId=script_id).execute) + + logger.info(f"[delete_script_project] Deleted script {script_id}") + return f"Deleted Apps Script project: {script_id}" + + +@server.tool() +@handle_http_errors("delete_script_project", is_read_only=False, service_type="drive") +@require_google_service("drive", "drive_full") +async def delete_script_project( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """ + Deletes an Apps Script project. + + This permanently deletes the script project. The action cannot be undone. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID to delete + + Returns: + str: Confirmation message + """ + return await _delete_script_project_impl(service, user_google_email, script_id) + + +# ============================================================================ +# Version Management +# ============================================================================ + + +async def _list_versions_impl( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """Internal implementation for list_versions.""" + logger.info(f"[list_versions] Email: {user_google_email}, ScriptID: {script_id}") + + response = await asyncio.to_thread( + service.projects().versions().list(scriptId=script_id).execute + ) + + versions = response.get("versions", []) + + if not versions: + return f"No versions found for script: {script_id}" + + output = [f"Versions for script: {script_id}", ""] + + for version in versions: + version_number = version.get("versionNumber", "Unknown") + description = version.get("description", "No description") + create_time = version.get("createTime", "Unknown") + + output.append(f"Version {version_number}: {description}") + output.append(f" Created: {create_time}") + output.append("") + + logger.info(f"[list_versions] Found {len(versions)} versions") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("list_versions", is_read_only=True, service_type="script") +@require_google_service("script", "script_readonly") +async def list_versions( + service: Any, + user_google_email: str, + script_id: str, +) -> str: + """ + Lists all versions of a script project. + + Versions are immutable snapshots of your script code. + They are created when you deploy or explicitly create a version. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + + Returns: + str: Formatted string with version list + """ + return await _list_versions_impl(service, user_google_email, script_id) + + +async def _create_version_impl( + service: Any, + user_google_email: str, + script_id: str, + description: Optional[str] = None, +) -> str: + """Internal implementation for create_version.""" + logger.info(f"[create_version] Email: {user_google_email}, ScriptID: {script_id}") + + request_body = {} + if description: + request_body["description"] = description + + version = await asyncio.to_thread( + service.projects() + .versions() + .create(scriptId=script_id, body=request_body) + .execute + ) + + version_number = version.get("versionNumber", "Unknown") + create_time = version.get("createTime", "Unknown") + + output = [ + f"Created version {version_number} for script: {script_id}", + f"Description: {description or 'No description'}", + f"Created: {create_time}", + ] + + logger.info(f"[create_version] Created version {version_number}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("create_version", is_read_only=False, service_type="script") +@require_google_service("script", "script_full") +async def create_version( + service: Any, + user_google_email: str, + script_id: str, + description: Optional[str] = None, +) -> str: + """ + Creates a new immutable version of a script project. + + Versions capture a snapshot of the current script code. + Once created, versions cannot be modified. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + description: Optional description for this version + + Returns: + str: Formatted string with new version details + """ + return await _create_version_impl( + service, user_google_email, script_id, description + ) + + +async def _get_version_impl( + service: Any, + user_google_email: str, + script_id: str, + version_number: int, +) -> str: + """Internal implementation for get_version.""" + logger.info( + f"[get_version] Email: {user_google_email}, ScriptID: {script_id}, Version: {version_number}" + ) + + version = await asyncio.to_thread( + service.projects() + .versions() + .get(scriptId=script_id, versionNumber=version_number) + .execute + ) + + ver_num = version.get("versionNumber", "Unknown") + description = version.get("description", "No description") + create_time = version.get("createTime", "Unknown") + + output = [ + f"Version {ver_num} of script: {script_id}", + f"Description: {description}", + f"Created: {create_time}", + ] + + logger.info(f"[get_version] Retrieved version {ver_num}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("get_version", is_read_only=True, service_type="script") +@require_google_service("script", "script_readonly") +async def get_version( + service: Any, + user_google_email: str, + script_id: str, + version_number: int, +) -> str: + """ + Gets details of a specific version. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + version_number: The version number to retrieve (1, 2, 3, etc.) + + Returns: + str: Formatted string with version details + """ + return await _get_version_impl( + service, user_google_email, script_id, version_number + ) + + +# ============================================================================ +# Metrics +# ============================================================================ + + +async def _get_script_metrics_impl( + service: Any, + user_google_email: str, + script_id: str, + metrics_granularity: str = "DAILY", +) -> str: + """Internal implementation for get_script_metrics.""" + logger.info( + f"[get_script_metrics] Email: {user_google_email}, ScriptID: {script_id}, Granularity: {metrics_granularity}" + ) + + request_params = { + "scriptId": script_id, + "metricsGranularity": metrics_granularity, + } + + response = await asyncio.to_thread( + service.projects().getMetrics(**request_params).execute + ) + + output = [ + f"Metrics for script: {script_id}", + f"Granularity: {metrics_granularity}", + "", + ] + + # Active users + active_users = response.get("activeUsers", []) + if active_users: + output.append("Active Users:") + for metric in active_users: + start_time = metric.get("startTime", "Unknown") + end_time = metric.get("endTime", "Unknown") + value = metric.get("value", "0") + output.append(f" {start_time} to {end_time}: {value} users") + output.append("") + + # Total executions + total_executions = response.get("totalExecutions", []) + if total_executions: + output.append("Total Executions:") + for metric in total_executions: + start_time = metric.get("startTime", "Unknown") + end_time = metric.get("endTime", "Unknown") + value = metric.get("value", "0") + output.append(f" {start_time} to {end_time}: {value} executions") + output.append("") + + # Failed executions + failed_executions = response.get("failedExecutions", []) + if failed_executions: + output.append("Failed Executions:") + for metric in failed_executions: + start_time = metric.get("startTime", "Unknown") + end_time = metric.get("endTime", "Unknown") + value = metric.get("value", "0") + output.append(f" {start_time} to {end_time}: {value} failures") + output.append("") + + if not active_users and not total_executions and not failed_executions: + output.append("No metrics data available for this script.") + + logger.info(f"[get_script_metrics] Retrieved metrics for {script_id}") + return "\n".join(output) + + +@server.tool() +@handle_http_errors("get_script_metrics", is_read_only=True, service_type="script") +@require_google_service("script", "script_readonly") +async def get_script_metrics( + service: Any, + user_google_email: str, + script_id: str, + metrics_granularity: str = "DAILY", +) -> str: + """ + Gets execution metrics for a script project. + + Returns analytics data including active users, total executions, + and failed executions over time. + + Args: + service: Injected Google API service client + user_google_email: User's email address + script_id: The script project ID + metrics_granularity: Granularity of metrics - "DAILY" or "WEEKLY" + + Returns: + str: Formatted string with metrics data + """ + return await _get_script_metrics_impl( + service, user_google_email, script_id, metrics_granularity + ) + + +# ============================================================================ +# Trigger Code Generation +# ============================================================================ + + +def _generate_trigger_code_impl( + trigger_type: str, + function_name: str, + schedule: str = "", +) -> str: + """Internal implementation for generate_trigger_code.""" + code_lines = [] + + if trigger_type == "on_open": + code_lines = [ + "// Simple trigger - just rename your function to 'onOpen'", + "// This runs automatically when the document is opened", + "function onOpen(e) {", + f" {function_name}();", + "}", + ] + elif trigger_type == "on_edit": + code_lines = [ + "// Simple trigger - just rename your function to 'onEdit'", + "// This runs automatically when a user edits the spreadsheet", + "function onEdit(e) {", + f" {function_name}();", + "}", + ] + elif trigger_type == "time_minutes": + interval = schedule or "5" + code_lines = [ + "// Run this function ONCE to install the trigger", + f"function createTimeTrigger_{function_name}() {{", + " // Delete existing triggers for this function first", + " const triggers = ScriptApp.getProjectTriggers();", + " triggers.forEach(trigger => {", + f" if (trigger.getHandlerFunction() === '{function_name}') {{", + " ScriptApp.deleteTrigger(trigger);", + " }", + " });", + "", + f" // Create new trigger - runs every {interval} minutes", + f" ScriptApp.newTrigger('{function_name}')", + " .timeBased()", + f" .everyMinutes({interval})", + " .create();", + "", + f" Logger.log('Trigger created: {function_name} will run every {interval} minutes');", + "}", + ] + elif trigger_type == "time_hours": + interval = schedule or "1" + code_lines = [ + "// Run this function ONCE to install the trigger", + f"function createTimeTrigger_{function_name}() {{", + " // Delete existing triggers for this function first", + " const triggers = ScriptApp.getProjectTriggers();", + " triggers.forEach(trigger => {", + f" if (trigger.getHandlerFunction() === '{function_name}') {{", + " ScriptApp.deleteTrigger(trigger);", + " }", + " });", + "", + f" // Create new trigger - runs every {interval} hour(s)", + f" ScriptApp.newTrigger('{function_name}')", + " .timeBased()", + f" .everyHours({interval})", + " .create();", + "", + f" Logger.log('Trigger created: {function_name} will run every {interval} hour(s)');", + "}", + ] + elif trigger_type == "time_daily": + hour = schedule or "9" + code_lines = [ + "// Run this function ONCE to install the trigger", + f"function createDailyTrigger_{function_name}() {{", + " // Delete existing triggers for this function first", + " const triggers = ScriptApp.getProjectTriggers();", + " triggers.forEach(trigger => {", + f" if (trigger.getHandlerFunction() === '{function_name}') {{", + " ScriptApp.deleteTrigger(trigger);", + " }", + " });", + "", + f" // Create new trigger - runs daily at {hour}:00", + f" ScriptApp.newTrigger('{function_name}')", + " .timeBased()", + f" .atHour({hour})", + " .everyDays(1)", + " .create();", + "", + f" Logger.log('Trigger created: {function_name} will run daily at {hour}:00');", + "}", + ] + elif trigger_type == "time_weekly": + day = schedule.upper() if schedule else "MONDAY" + code_lines = [ + "// Run this function ONCE to install the trigger", + f"function createWeeklyTrigger_{function_name}() {{", + " // Delete existing triggers for this function first", + " const triggers = ScriptApp.getProjectTriggers();", + " triggers.forEach(trigger => {", + f" if (trigger.getHandlerFunction() === '{function_name}') {{", + " ScriptApp.deleteTrigger(trigger);", + " }", + " });", + "", + f" // Create new trigger - runs weekly on {day}", + f" ScriptApp.newTrigger('{function_name}')", + " .timeBased()", + f" .onWeekDay(ScriptApp.WeekDay.{day})", + " .atHour(9)", + " .create();", + "", + f" Logger.log('Trigger created: {function_name} will run every {day} at 9:00');", + "}", + ] + elif trigger_type == "on_form_submit": + code_lines = [ + "// Run this function ONCE to install the trigger", + "// This must be run from a script BOUND to the Google Form", + f"function createFormSubmitTrigger_{function_name}() {{", + " // Delete existing triggers for this function first", + " const triggers = ScriptApp.getProjectTriggers();", + " triggers.forEach(trigger => {", + f" if (trigger.getHandlerFunction() === '{function_name}') {{", + " ScriptApp.deleteTrigger(trigger);", + " }", + " });", + "", + " // Create new trigger - runs when form is submitted", + f" ScriptApp.newTrigger('{function_name}')", + " .forForm(FormApp.getActiveForm())", + " .onFormSubmit()", + " .create();", + "", + f" Logger.log('Trigger created: {function_name} will run on form submit');", + "}", + ] + elif trigger_type == "on_change": + code_lines = [ + "// Run this function ONCE to install the trigger", + "// This must be run from a script BOUND to a Google Sheet", + f"function createChangeTrigger_{function_name}() {{", + " // Delete existing triggers for this function first", + " const triggers = ScriptApp.getProjectTriggers();", + " triggers.forEach(trigger => {", + f" if (trigger.getHandlerFunction() === '{function_name}') {{", + " ScriptApp.deleteTrigger(trigger);", + " }", + " });", + "", + " // Create new trigger - runs when spreadsheet changes", + f" ScriptApp.newTrigger('{function_name}')", + " .forSpreadsheet(SpreadsheetApp.getActive())", + " .onChange()", + " .create();", + "", + f" Logger.log('Trigger created: {function_name} will run on spreadsheet change');", + "}", + ] + else: + return ( + f"Unknown trigger type: {trigger_type}\n\n" + "Valid types: time_minutes, time_hours, time_daily, time_weekly, " + "on_open, on_edit, on_form_submit, on_change" + ) + + code = "\n".join(code_lines) + + instructions = [] + if trigger_type.startswith("on_"): + if trigger_type in ("on_open", "on_edit"): + instructions = [ + "SIMPLE TRIGGER", + "=" * 50, + "", + "Add this code to your script. Simple triggers run automatically", + "when the event occurs - no setup function needed.", + "", + "Note: Simple triggers have limitations:", + "- Cannot access services that require authorization", + "- Cannot run longer than 30 seconds", + "- Cannot make external HTTP requests", + "", + "For more capabilities, use an installable trigger instead.", + "", + "CODE TO ADD:", + "-" * 50, + ] + else: + instructions = [ + "INSTALLABLE TRIGGER", + "=" * 50, + "", + "1. Add this code to your script", + f"2. Run the setup function once: createFormSubmitTrigger_{function_name}() or similar", + "3. The trigger will then run automatically", + "", + "CODE TO ADD:", + "-" * 50, + ] + else: + instructions = [ + "INSTALLABLE TRIGGER", + "=" * 50, + "", + "1. Add this code to your script using update_script_content", + "2. Run the setup function ONCE (manually in Apps Script editor or via run_script_function)", + "3. The trigger will then run automatically on schedule", + "", + "To check installed triggers: Apps Script editor > Triggers (clock icon)", + "", + "CODE TO ADD:", + "-" * 50, + ] + + return "\n".join(instructions) + "\n\n" + code + + +@server.tool() +async def generate_trigger_code( + trigger_type: str, + function_name: str, + schedule: str = "", +) -> str: + """ + Generates Apps Script code for creating triggers. + + The Apps Script API cannot create triggers directly - they must be created + from within Apps Script itself. This tool generates the code you need. + + Args: + trigger_type: Type of trigger. One of: + - "time_minutes" (run every N minutes: 1, 5, 10, 15, 30) + - "time_hours" (run every N hours: 1, 2, 4, 6, 8, 12) + - "time_daily" (run daily at a specific hour: 0-23) + - "time_weekly" (run weekly on a specific day) + - "on_open" (simple trigger - runs when document opens) + - "on_edit" (simple trigger - runs when user edits) + - "on_form_submit" (runs when form is submitted) + - "on_change" (runs when content changes) + + function_name: The function to run when trigger fires (e.g., "sendDailyReport") + + schedule: Schedule details (depends on trigger_type): + - For time_minutes: "1", "5", "10", "15", or "30" + - For time_hours: "1", "2", "4", "6", "8", or "12" + - For time_daily: hour as "0"-"23" (e.g., "9" for 9am) + - For time_weekly: "MONDAY", "TUESDAY", etc. + - For simple triggers (on_open, on_edit): not needed + + Returns: + str: Apps Script code to create the trigger + """ + return _generate_trigger_code_impl(trigger_type, function_name, schedule) diff --git a/gcalendar/__init__.py b/gcalendar/__init__.py new file mode 100644 index 0000000..0aa9534 --- /dev/null +++ b/gcalendar/__init__.py @@ -0,0 +1 @@ +# Make the calendar directory a Python package diff --git a/gcalendar/calendar_tools.py b/gcalendar/calendar_tools.py new file mode 100644 index 0000000..60b366d --- /dev/null +++ b/gcalendar/calendar_tools.py @@ -0,0 +1,1346 @@ +""" +Google Calendar MCP Tools + +This module provides MCP tools for interacting with Google Calendar API. +""" + +import datetime +import logging +import asyncio +import re +import uuid +import json +from typing import List, Optional, Dict, Any, Union + +from googleapiclient.errors import HttpError +from googleapiclient.discovery import build + +from auth.service_decorator import require_google_service +from core.utils import handle_http_errors + +from core.server import server + + +# Configure module logger +logger = logging.getLogger(__name__) + + +def _parse_reminders_json( + reminders_input: Optional[Union[str, List[Dict[str, Any]]]], function_name: str +) -> List[Dict[str, Any]]: + """ + Parse reminders from JSON string or list object and validate them. + + Args: + reminders_input: JSON string containing reminder objects or list of reminder objects + function_name: Name of calling function for logging + + Returns: + List of validated reminder objects + """ + if not reminders_input: + return [] + + # Handle both string (JSON) and list inputs + if isinstance(reminders_input, str): + try: + reminders = json.loads(reminders_input) + if not isinstance(reminders, list): + logger.warning( + f"[{function_name}] Reminders must be a JSON array, got {type(reminders).__name__}" + ) + return [] + except json.JSONDecodeError as e: + logger.warning(f"[{function_name}] Invalid JSON for reminders: {e}") + return [] + elif isinstance(reminders_input, list): + reminders = reminders_input + else: + logger.warning( + f"[{function_name}] Reminders must be a JSON string or list, got {type(reminders_input).__name__}" + ) + return [] + + # Validate reminders + if len(reminders) > 5: + logger.warning( + f"[{function_name}] More than 5 reminders provided, truncating to first 5" + ) + reminders = reminders[:5] + + validated_reminders = [] + for reminder in reminders: + if ( + not isinstance(reminder, dict) + or "method" not in reminder + or "minutes" not in reminder + ): + logger.warning( + f"[{function_name}] Invalid reminder format: {reminder}, skipping" + ) + continue + + method = reminder["method"].lower() + if method not in ["popup", "email"]: + logger.warning( + f"[{function_name}] Invalid reminder method '{method}', must be 'popup' or 'email', skipping" + ) + continue + + minutes = reminder["minutes"] + if not isinstance(minutes, int) or minutes < 0 or minutes > 40320: + logger.warning( + f"[{function_name}] Invalid reminder minutes '{minutes}', must be integer 0-40320, skipping" + ) + continue + + validated_reminders.append({"method": method, "minutes": minutes}) + + return validated_reminders + + +def _apply_transparency_if_valid( + event_body: Dict[str, Any], + transparency: Optional[str], + function_name: str, +) -> None: + """ + Apply transparency to the event body if the provided value is valid. + + Args: + event_body: Event payload being constructed. + transparency: Provided transparency value. + function_name: Name of the calling function for logging context. + """ + if transparency is None: + return + + valid_transparency_values = ["opaque", "transparent"] + if transparency in valid_transparency_values: + event_body["transparency"] = transparency + logger.info(f"[{function_name}] Set transparency to '{transparency}'") + else: + logger.warning( + f"[{function_name}] Invalid transparency value '{transparency}', must be 'opaque' or 'transparent', skipping" + ) + + +def _apply_visibility_if_valid( + event_body: Dict[str, Any], + visibility: Optional[str], + function_name: str, +) -> None: + """ + Apply visibility to the event body if the provided value is valid. + + Args: + event_body: Event payload being constructed. + visibility: Provided visibility value. + function_name: Name of the calling function for logging context. + """ + if visibility is None: + return + + valid_visibility_values = ["default", "public", "private", "confidential"] + if visibility in valid_visibility_values: + event_body["visibility"] = visibility + logger.info(f"[{function_name}] Set visibility to '{visibility}'") + else: + logger.warning( + f"[{function_name}] Invalid visibility value '{visibility}', must be 'default', 'public', 'private', or 'confidential', skipping" + ) + + +def _preserve_existing_fields( + event_body: Dict[str, Any], + existing_event: Dict[str, Any], + field_mappings: Dict[str, Any], +) -> None: + """ + Helper function to preserve existing event fields when not explicitly provided. + + Args: + event_body: The event body being built for the API call + existing_event: The existing event data from the API + field_mappings: Dict mapping field names to their new values (None means preserve existing) + """ + for field_name, new_value in field_mappings.items(): + if new_value is None and field_name in existing_event: + event_body[field_name] = existing_event[field_name] + logger.info(f"[modify_event] Preserving existing {field_name}") + elif new_value is not None: + event_body[field_name] = new_value + + +def _get_meeting_link(item: Dict[str, Any]) -> str: + """Extract video meeting link from event conference data or hangoutLink.""" + conference_data = item.get("conferenceData") + if conference_data and "entryPoints" in conference_data: + for entry_point in conference_data["entryPoints"]: + if entry_point.get("entryPointType") == "video": + uri = entry_point.get("uri", "") + if uri: + return uri + hangout_link = item.get("hangoutLink", "") + if hangout_link: + return hangout_link + return "" + + +def _format_attendee_details( + attendees: List[Dict[str, Any]], indent: str = " " +) -> str: + """ + Format attendee details including response status, organizer, and optional flags. + + Example output format: + " user@example.com: accepted + manager@example.com: declined (organizer) + optional-person@example.com: tentative (optional)" + + Args: + attendees: List of attendee dictionaries from Google Calendar API + indent: Indentation to use for newline-separated attendees (default: " ") + + Returns: + Formatted string with attendee details, or "None" if no attendees + """ + if not attendees: + return "None" + + attendee_details_list = [] + for a in attendees: + email = a.get("email", "unknown") + response_status = a.get("responseStatus", "unknown") + optional = a.get("optional", False) + organizer = a.get("organizer", False) + + detail_parts = [f"{email}: {response_status}"] + if organizer: + detail_parts.append("(organizer)") + if optional: + detail_parts.append("(optional)") + + attendee_details_list.append(" ".join(detail_parts)) + + return f"\n{indent}".join(attendee_details_list) + + +def _format_attachment_details( + attachments: List[Dict[str, Any]], indent: str = " " +) -> str: + """ + Format attachment details including file information. + + + Args: + attachments: List of attachment dictionaries from Google Calendar API + indent: Indentation to use for newline-separated attachments (default: " ") + + Returns: + Formatted string with attachment details, or "None" if no attachments + """ + if not attachments: + return "None" + + attachment_details_list = [] + for att in attachments: + title = att.get("title", "Untitled") + file_url = att.get("fileUrl", "No URL") + file_id = att.get("fileId", "No ID") + mime_type = att.get("mimeType", "Unknown") + + attachment_info = ( + f"{title}\n" + f"{indent}File URL: {file_url}\n" + f"{indent}File ID: {file_id}\n" + f"{indent}MIME Type: {mime_type}" + ) + attachment_details_list.append(attachment_info) + + return f"\n{indent}".join(attachment_details_list) + + +# Helper function to ensure time strings for API calls are correctly formatted +def _correct_time_format_for_api( + time_str: Optional[str], param_name: str +) -> Optional[str]: + if not time_str: + return None + + logger.info( + f"_correct_time_format_for_api: Processing {param_name} with value '{time_str}'" + ) + + # Handle date-only format (YYYY-MM-DD) + if len(time_str) == 10 and time_str.count("-") == 2: + try: + # Validate it's a proper date + datetime.datetime.strptime(time_str, "%Y-%m-%d") + # For date-only, append T00:00:00Z to make it RFC3339 compliant + formatted = f"{time_str}T00:00:00Z" + logger.info( + f"Formatting date-only {param_name} '{time_str}' to RFC3339: '{formatted}'" + ) + return formatted + except ValueError: + logger.warning( + f"{param_name} '{time_str}' looks like a date but is not valid YYYY-MM-DD. Using as is." + ) + return time_str + + # Specifically address YYYY-MM-DDTHH:MM:SS by appending 'Z' + if ( + len(time_str) == 19 + and time_str[10] == "T" + and time_str.count(":") == 2 + and not ( + time_str.endswith("Z") or ("+" in time_str[10:]) or ("-" in time_str[10:]) + ) + ): + try: + # Validate the format before appending 'Z' + datetime.datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S") + logger.info( + f"Formatting {param_name} '{time_str}' by appending 'Z' for UTC." + ) + return time_str + "Z" + except ValueError: + logger.warning( + f"{param_name} '{time_str}' looks like it needs 'Z' but is not valid YYYY-MM-DDTHH:MM:SS. Using as is." + ) + return time_str + + # If it already has timezone info or doesn't match our patterns, return as is + logger.info(f"{param_name} '{time_str}' doesn't need formatting, using as is.") + return time_str + + +@server.tool() +@handle_http_errors("list_calendars", is_read_only=True, service_type="calendar") +@require_google_service("calendar", "calendar_read") +async def list_calendars(service, user_google_email: str) -> str: + """ + Retrieves a list of calendars accessible to the authenticated user. + + Args: + user_google_email (str): The user's Google email address. Required. + + Returns: + str: A formatted list of the user's calendars (summary, ID, primary status). + """ + logger.info(f"[list_calendars] Invoked. Email: '{user_google_email}'") + + calendar_list_response = await asyncio.to_thread( + lambda: service.calendarList().list().execute() + ) + items = calendar_list_response.get("items", []) + if not items: + return f"No calendars found for {user_google_email}." + + calendars_summary_list = [ + f'- "{cal.get("summary", "No Summary")}"{" (Primary)" if cal.get("primary") else ""} (ID: {cal["id"]})' + for cal in items + ] + text_output = ( + f"Successfully listed {len(items)} calendars for {user_google_email}:\n" + + "\n".join(calendars_summary_list) + ) + logger.info(f"Successfully listed {len(items)} calendars for {user_google_email}.") + return text_output + + +@server.tool() +@handle_http_errors("get_events", is_read_only=True, service_type="calendar") +@require_google_service("calendar", "calendar_read") +async def get_events( + service, + user_google_email: str, + calendar_id: str = "primary", + event_id: Optional[str] = None, + time_min: Optional[str] = None, + time_max: Optional[str] = None, + max_results: int = 25, + query: Optional[str] = None, + detailed: bool = False, + include_attachments: bool = False, +) -> str: + """ + Retrieves events from a specified Google Calendar. Can retrieve a single event by ID or multiple events within a time range. + You can also search for events by keyword by supplying the optional "query" param. + + Args: + user_google_email (str): The user's Google email address. Required. + calendar_id (str): The ID of the calendar to query. Use 'primary' for the user's primary calendar. Defaults to 'primary'. Calendar IDs can be obtained using `list_calendars`. + event_id (Optional[str]): The ID of a specific event to retrieve. If provided, retrieves only this event and ignores time filtering parameters. + time_min (Optional[str]): The start of the time range (inclusive) in RFC3339 format (e.g., '2024-05-12T10:00:00Z' or '2024-05-12'). If omitted, defaults to the current time. Ignored if event_id is provided. + time_max (Optional[str]): The end of the time range (exclusive) in RFC3339 format. If omitted, events starting from `time_min` onwards are considered (up to `max_results`). Ignored if event_id is provided. + max_results (int): The maximum number of events to return. Defaults to 25. Ignored if event_id is provided. + query (Optional[str]): A keyword to search for within event fields (summary, description, location). Ignored if event_id is provided. + detailed (bool): Whether to return detailed event information including description, location, attendees, and attendee details (response status, organizer, optional flags). Defaults to False. + include_attachments (bool): Whether to include attachment information in detailed event output. When True, shows attachment details (fileId, fileUrl, mimeType, title) for events that have attachments. Only applies when detailed=True. Set this to True when you need to view or access files that have been attached to calendar events, such as meeting documents, presentations, or other shared files. Defaults to False. + + Returns: + str: A formatted list of events (summary, start and end times, link) within the specified range, or detailed information for a single event if event_id is provided. + """ + logger.info( + f"[get_events] Raw parameters - event_id: '{event_id}', time_min: '{time_min}', time_max: '{time_max}', query: '{query}', detailed: {detailed}, include_attachments: {include_attachments}" + ) + + # Handle single event retrieval + if event_id: + logger.info(f"[get_events] Retrieving single event with ID: {event_id}") + event = await asyncio.to_thread( + lambda: ( + service.events().get(calendarId=calendar_id, eventId=event_id).execute() + ) + ) + items = [event] + else: + # Handle multiple events retrieval with time filtering + # Ensure time_min and time_max are correctly formatted for the API + formatted_time_min = _correct_time_format_for_api(time_min, "time_min") + if formatted_time_min: + effective_time_min = formatted_time_min + else: + utc_now = datetime.datetime.now(datetime.timezone.utc) + effective_time_min = utc_now.isoformat().replace("+00:00", "Z") + if time_min is None: + logger.info( + f"time_min not provided, defaulting to current UTC time: {effective_time_min}" + ) + else: + logger.info( + f"time_min processing: original='{time_min}', formatted='{formatted_time_min}', effective='{effective_time_min}'" + ) + + effective_time_max = _correct_time_format_for_api(time_max, "time_max") + if time_max: + logger.info( + f"time_max processing: original='{time_max}', formatted='{effective_time_max}'" + ) + + logger.info( + f"[get_events] Final API parameters - calendarId: '{calendar_id}', timeMin: '{effective_time_min}', timeMax: '{effective_time_max}', maxResults: {max_results}, query: '{query}'" + ) + + # Build the request parameters dynamically + request_params = { + "calendarId": calendar_id, + "timeMin": effective_time_min, + "timeMax": effective_time_max, + "maxResults": max_results, + "singleEvents": True, + "orderBy": "startTime", + } + + if query: + request_params["q"] = query + + events_result = await asyncio.to_thread( + lambda: service.events().list(**request_params).execute() + ) + items = events_result.get("items", []) + if not items: + if event_id: + return f"Event with ID '{event_id}' not found in calendar '{calendar_id}' for {user_google_email}." + else: + return f"No events found in calendar '{calendar_id}' for {user_google_email} for the specified time range." + + # Handle returning detailed output for a single event when requested + if event_id and detailed: + item = items[0] + summary = item.get("summary", "No Title") + start = item["start"].get("dateTime", item["start"].get("date")) + end = item["end"].get("dateTime", item["end"].get("date")) + link = item.get("htmlLink", "No Link") + description = item.get("description", "No Description") + location = item.get("location", "No Location") + color_id = item.get("colorId", "None") + attendees = item.get("attendees", []) + attendee_emails = ( + ", ".join([a.get("email", "") for a in attendees]) if attendees else "None" + ) + attendee_details_str = _format_attendee_details(attendees, indent=" ") + + meeting_link = _get_meeting_link(item) + + event_details = ( + f"Event Details:\n" + f"- Title: {summary}\n" + f"- Starts: {start}\n" + f"- Ends: {end}\n" + f"- Description: {description}\n" + f"- Location: {location}\n" + f"- Color ID: {color_id}\n" + ) + if meeting_link: + event_details += f"- Meeting Link: {meeting_link}\n" + event_details += ( + f"- Attendees: {attendee_emails}\n" + f"- Attendee Details: {attendee_details_str}\n" + ) + + if include_attachments: + attachments = item.get("attachments", []) + attachment_details_str = _format_attachment_details( + attachments, indent=" " + ) + event_details += f"- Attachments: {attachment_details_str}\n" + + event_details += f"- Event ID: {event_id}\n- Link: {link}" + logger.info( + f"[get_events] Successfully retrieved detailed event {event_id} for {user_google_email}." + ) + return event_details + + # Handle multiple events or single event with basic output + event_details_list = [] + for item in items: + summary = item.get("summary", "No Title") + start_time = item["start"].get("dateTime", item["start"].get("date")) + end_time = item["end"].get("dateTime", item["end"].get("date")) + link = item.get("htmlLink", "No Link") + item_event_id = item.get("id", "No ID") + + if detailed: + # Add detailed information for multiple events + description = item.get("description", "No Description") + location = item.get("location", "No Location") + attendees = item.get("attendees", []) + attendee_emails = ( + ", ".join([a.get("email", "") for a in attendees]) + if attendees + else "None" + ) + attendee_details_str = _format_attendee_details(attendees, indent=" ") + + meeting_link = _get_meeting_link(item) + + event_detail_parts = ( + f'- "{summary}" (Starts: {start_time}, Ends: {end_time})\n' + f" Description: {description}\n" + f" Location: {location}\n" + ) + if meeting_link: + event_detail_parts += f" Meeting Link: {meeting_link}\n" + event_detail_parts += ( + f" Attendees: {attendee_emails}\n" + f" Attendee Details: {attendee_details_str}\n" + ) + + if include_attachments: + attachments = item.get("attachments", []) + attachment_details_str = _format_attachment_details( + attachments, indent=" " + ) + event_detail_parts += f" Attachments: {attachment_details_str}\n" + + event_detail_parts += f" ID: {item_event_id} | Link: {link}" + event_details_list.append(event_detail_parts) + else: + # Basic output format + meeting_link = _get_meeting_link(item) + basic_line = f'- "{summary}" (Starts: {start_time}, Ends: {end_time})' + if meeting_link: + basic_line += f" Meeting: {meeting_link}" + basic_line += f" ID: {item_event_id} | Link: {link}" + event_details_list.append(basic_line) + + if event_id: + # Single event basic output + text_output = ( + f"Successfully retrieved event from calendar '{calendar_id}' for {user_google_email}:\n" + + "\n".join(event_details_list) + ) + else: + # Multiple events output + text_output = ( + f"Successfully retrieved {len(items)} events from calendar '{calendar_id}' for {user_google_email}:\n" + + "\n".join(event_details_list) + ) + + logger.info(f"Successfully retrieved {len(items)} events for {user_google_email}.") + return text_output + + +# --------------------------------------------------------------------------- +# Internal implementation functions for event create/modify/delete. +# These are called by both the consolidated ``manage_event`` tool and the +# legacy single-action tools. +# --------------------------------------------------------------------------- + + +async def _create_event_impl( + service, + user_google_email: str, + summary: str, + start_time: str, + end_time: str, + calendar_id: str = "primary", + description: Optional[str] = None, + location: Optional[str] = None, + attendees: Optional[List[str]] = None, + timezone: Optional[str] = None, + attachments: Optional[List[str]] = None, + add_google_meet: bool = False, + reminders: Optional[Union[str, List[Dict[str, Any]]]] = None, + use_default_reminders: bool = True, + transparency: Optional[str] = None, + visibility: Optional[str] = None, + guests_can_modify: Optional[bool] = None, + guests_can_invite_others: Optional[bool] = None, + guests_can_see_other_guests: Optional[bool] = None, +) -> str: + """Internal implementation for creating a calendar event.""" + logger.info( + f"[create_event] Invoked. Email: '{user_google_email}', Summary: {summary}" + ) + logger.info(f"[create_event] Incoming attachments param: {attachments}") + # If attachments value is a string, split by comma and strip whitespace + if attachments and isinstance(attachments, str): + attachments = [a.strip() for a in attachments.split(",") if a.strip()] + logger.info( + f"[create_event] Parsed attachments list from string: {attachments}" + ) + event_body: Dict[str, Any] = { + "summary": summary, + "start": ( + {"date": start_time} if "T" not in start_time else {"dateTime": start_time} + ), + "end": ({"date": end_time} if "T" not in end_time else {"dateTime": end_time}), + } + if location: + event_body["location"] = location + if description: + event_body["description"] = description + if timezone: + if "dateTime" in event_body["start"]: + event_body["start"]["timeZone"] = timezone + if "dateTime" in event_body["end"]: + event_body["end"]["timeZone"] = timezone + if attendees: + event_body["attendees"] = [{"email": email} for email in attendees] + + # Handle reminders + if reminders is not None or not use_default_reminders: + # If custom reminders are provided, automatically disable default reminders + effective_use_default = use_default_reminders and reminders is None + + reminder_data = {"useDefault": effective_use_default} + if reminders is not None: + validated_reminders = _parse_reminders_json(reminders, "create_event") + if validated_reminders: + reminder_data["overrides"] = validated_reminders + logger.info( + f"[create_event] Added {len(validated_reminders)} custom reminders" + ) + if use_default_reminders: + logger.info( + "[create_event] Custom reminders provided - disabling default reminders" + ) + + event_body["reminders"] = reminder_data + + # Handle transparency validation + _apply_transparency_if_valid(event_body, transparency, "create_event") + + # Handle visibility validation + _apply_visibility_if_valid(event_body, visibility, "create_event") + + # Handle guest permissions + if guests_can_modify is not None: + event_body["guestsCanModify"] = guests_can_modify + logger.info(f"[create_event] Set guestsCanModify to {guests_can_modify}") + if guests_can_invite_others is not None: + event_body["guestsCanInviteOthers"] = guests_can_invite_others + logger.info( + f"[create_event] Set guestsCanInviteOthers to {guests_can_invite_others}" + ) + if guests_can_see_other_guests is not None: + event_body["guestsCanSeeOtherGuests"] = guests_can_see_other_guests + logger.info( + f"[create_event] Set guestsCanSeeOtherGuests to {guests_can_see_other_guests}" + ) + + if add_google_meet: + request_id = str(uuid.uuid4()) + event_body["conferenceData"] = { + "createRequest": { + "requestId": request_id, + "conferenceSolutionKey": {"type": "hangoutsMeet"}, + } + } + logger.info( + f"[create_event] Adding Google Meet conference with request ID: {request_id}" + ) + + if attachments: + # Accept both file URLs and file IDs. If a URL, extract the fileId. + event_body["attachments"] = [] + drive_service = None + try: + try: + drive_service = service._http and build( + "drive", "v3", http=service._http + ) + except Exception as e: + logger.warning( + f"Could not build Drive service for MIME type lookup: {e}" + ) + for att in attachments: + file_id = None + if att.startswith("https://"): + # Match /d/, /file/d/, ?id= + match = re.search(r"(?:/d/|/file/d/|id=)([\w-]+)", att) + file_id = match.group(1) if match else None + logger.info( + f"[create_event] Extracted file_id '{file_id}' from attachment URL '{att}'" + ) + else: + file_id = att + logger.info( + f"[create_event] Using direct file_id '{file_id}' for attachment" + ) + if file_id: + file_url = f"https://drive.google.com/open?id={file_id}" + mime_type = "application/vnd.google-apps.drive-sdk" + title = "Drive Attachment" + # Try to get the actual MIME type and filename from Drive + if drive_service: + try: + file_metadata = await asyncio.to_thread( + lambda: ( + drive_service.files() + .get( + fileId=file_id, + fields="mimeType,name", + supportsAllDrives=True, + ) + .execute() + ) + ) + mime_type = file_metadata.get("mimeType", mime_type) + filename = file_metadata.get("name") + if filename: + title = filename + logger.info( + f"[create_event] Using filename '{filename}' as attachment title" + ) + else: + logger.info( + "[create_event] No filename found, using generic title" + ) + except Exception as e: + logger.warning( + f"Could not fetch metadata for file {file_id}: {e}" + ) + event_body["attachments"].append( + { + "fileUrl": file_url, + "title": title, + "mimeType": mime_type, + } + ) + finally: + if drive_service: + drive_service.close() + created_event = await asyncio.to_thread( + lambda: ( + service.events() + .insert( + calendarId=calendar_id, + body=event_body, + supportsAttachments=True, + conferenceDataVersion=1 if add_google_meet else 0, + ) + .execute() + ) + ) + else: + created_event = await asyncio.to_thread( + lambda: ( + service.events() + .insert( + calendarId=calendar_id, + body=event_body, + conferenceDataVersion=1 if add_google_meet else 0, + ) + .execute() + ) + ) + link = created_event.get("htmlLink", "No link available") + confirmation_message = f"Successfully created event '{created_event.get('summary', summary)}' for {user_google_email}. Link: {link}" + + # Add Google Meet information if conference was created + if add_google_meet and "conferenceData" in created_event: + conference_data = created_event["conferenceData"] + if "entryPoints" in conference_data: + for entry_point in conference_data["entryPoints"]: + if entry_point.get("entryPointType") == "video": + meet_link = entry_point.get("uri", "") + if meet_link: + confirmation_message += f" Google Meet: {meet_link}" + break + + logger.info( + f"Event created successfully for {user_google_email}. ID: {created_event.get('id')}, Link: {link}" + ) + return confirmation_message + + +def _normalize_attendees( + attendees: Optional[Union[List[str], List[Dict[str, Any]]]], +) -> Optional[List[Dict[str, Any]]]: + """ + Normalize attendees input to list of attendee objects. + + Accepts either: + - List of email strings: ["user@example.com", "other@example.com"] + - List of attendee objects: [{"email": "user@example.com", "responseStatus": "accepted"}] + - Mixed list of both formats + + Returns list of attendee dicts with at minimum 'email' key. + """ + if attendees is None: + return None + + normalized = [] + for att in attendees: + if isinstance(att, str): + normalized.append({"email": att}) + elif isinstance(att, dict) and "email" in att: + normalized.append(att) + else: + logger.warning( + f"[_normalize_attendees] Invalid attendee format: {att}, skipping" + ) + return normalized if normalized else None + + +async def _modify_event_impl( + service, + user_google_email: str, + event_id: str, + calendar_id: str = "primary", + summary: Optional[str] = None, + start_time: Optional[str] = None, + end_time: Optional[str] = None, + description: Optional[str] = None, + location: Optional[str] = None, + attendees: Optional[Union[List[str], List[Dict[str, Any]]]] = None, + timezone: Optional[str] = None, + add_google_meet: Optional[bool] = None, + reminders: Optional[Union[str, List[Dict[str, Any]]]] = None, + use_default_reminders: Optional[bool] = None, + transparency: Optional[str] = None, + visibility: Optional[str] = None, + color_id: Optional[str] = None, + guests_can_modify: Optional[bool] = None, + guests_can_invite_others: Optional[bool] = None, + guests_can_see_other_guests: Optional[bool] = None, +) -> str: + """Internal implementation for modifying a calendar event.""" + logger.info( + f"[modify_event] Invoked. Email: '{user_google_email}', Event ID: {event_id}" + ) + + # Build the event body with only the fields that are provided + event_body: Dict[str, Any] = {} + if summary is not None: + event_body["summary"] = summary + if start_time is not None: + event_body["start"] = ( + {"date": start_time} if "T" not in start_time else {"dateTime": start_time} + ) + if timezone is not None and "dateTime" in event_body["start"]: + event_body["start"]["timeZone"] = timezone + if end_time is not None: + event_body["end"] = ( + {"date": end_time} if "T" not in end_time else {"dateTime": end_time} + ) + if timezone is not None and "dateTime" in event_body["end"]: + event_body["end"]["timeZone"] = timezone + if description is not None: + event_body["description"] = description + if location is not None: + event_body["location"] = location + + # Normalize attendees - accepts both email strings and full attendee objects + normalized_attendees = _normalize_attendees(attendees) + if normalized_attendees is not None: + event_body["attendees"] = normalized_attendees + + if color_id is not None: + event_body["colorId"] = color_id + + # Handle reminders + if reminders is not None or use_default_reminders is not None: + reminder_data = {} + if use_default_reminders is not None: + reminder_data["useDefault"] = use_default_reminders + else: + # Preserve existing event's useDefault value if not explicitly specified + try: + existing_event = ( + service.events() + .get(calendarId=calendar_id, eventId=event_id) + .execute() + ) + reminder_data["useDefault"] = existing_event.get("reminders", {}).get( + "useDefault", True + ) + except Exception as e: + logger.warning( + f"[modify_event] Could not fetch existing event for reminders: {e}" + ) + reminder_data["useDefault"] = ( + True # Fallback to True if unable to fetch + ) + + # If custom reminders are provided, automatically disable default reminders + if reminders is not None: + if reminder_data.get("useDefault", False): + reminder_data["useDefault"] = False + logger.info( + "[modify_event] Custom reminders provided - disabling default reminders" + ) + + validated_reminders = _parse_reminders_json(reminders, "modify_event") + if reminders and not validated_reminders: + logger.warning( + "[modify_event] Reminders provided but failed validation. No custom reminders will be set." + ) + elif validated_reminders: + reminder_data["overrides"] = validated_reminders + logger.info( + f"[modify_event] Updated reminders with {len(validated_reminders)} custom reminders" + ) + + event_body["reminders"] = reminder_data + + # Handle transparency validation + _apply_transparency_if_valid(event_body, transparency, "modify_event") + + # Handle visibility validation + _apply_visibility_if_valid(event_body, visibility, "modify_event") + + # Handle guest permissions + if guests_can_modify is not None: + event_body["guestsCanModify"] = guests_can_modify + logger.info(f"[modify_event] Set guestsCanModify to {guests_can_modify}") + if guests_can_invite_others is not None: + event_body["guestsCanInviteOthers"] = guests_can_invite_others + logger.info( + f"[modify_event] Set guestsCanInviteOthers to {guests_can_invite_others}" + ) + if guests_can_see_other_guests is not None: + event_body["guestsCanSeeOtherGuests"] = guests_can_see_other_guests + logger.info( + f"[modify_event] Set guestsCanSeeOtherGuests to {guests_can_see_other_guests}" + ) + + if timezone is not None and "start" not in event_body and "end" not in event_body: + # If timezone is provided but start/end times are not, we need to fetch the existing event + # to apply the timezone correctly. This is a simplification; a full implementation + # might handle this more robustly or require start/end with timezone. + # For now, we'll log a warning and skip applying timezone if start/end are missing. + logger.warning( + "[modify_event] Timezone provided but start_time and end_time are missing. Timezone will not be applied unless start/end times are also provided." + ) + + if not event_body: + message = "No fields provided to modify the event." + logger.warning(f"[modify_event] {message}") + raise Exception(message) + + # Log the event ID for debugging + logger.info( + f"[modify_event] Attempting to update event with ID: '{event_id}' in calendar '{calendar_id}'" + ) + + # Get the existing event to preserve fields that aren't being updated + try: + existing_event = await asyncio.to_thread( + lambda: ( + service.events().get(calendarId=calendar_id, eventId=event_id).execute() + ) + ) + logger.info( + "[modify_event] Successfully retrieved existing event before update" + ) + + # Preserve existing fields if not provided in the update + _preserve_existing_fields( + event_body, + existing_event, + { + "summary": summary, + "description": description, + "location": location, + # Use the already-normalized attendee objects (if provided); otherwise preserve existing + "attendees": event_body.get("attendees"), + "colorId": event_body.get("colorId"), + }, + ) + + # Handle Google Meet conference data + if add_google_meet is not None: + if add_google_meet: + # Add Google Meet + request_id = str(uuid.uuid4()) + event_body["conferenceData"] = { + "createRequest": { + "requestId": request_id, + "conferenceSolutionKey": {"type": "hangoutsMeet"}, + } + } + logger.info( + f"[modify_event] Adding Google Meet conference with request ID: {request_id}" + ) + else: + # Remove Google Meet by setting conferenceData to empty + event_body["conferenceData"] = {} + logger.info("[modify_event] Removing Google Meet conference") + elif "conferenceData" in existing_event: + # Preserve existing conference data if not specified + event_body["conferenceData"] = existing_event["conferenceData"] + logger.info("[modify_event] Preserving existing conference data") + + except HttpError as get_error: + if get_error.resp.status == 404: + logger.error( + f"[modify_event] Event not found during pre-update verification: {get_error}" + ) + message = f"Event not found during verification. The event with ID '{event_id}' could not be found in calendar '{calendar_id}'. This may be due to incorrect ID format or the event no longer exists." + raise Exception(message) + else: + logger.warning( + f"[modify_event] Error during pre-update verification, but proceeding with update: {get_error}" + ) + + # Proceed with the update + updated_event = await asyncio.to_thread( + lambda: ( + service.events() + .update( + calendarId=calendar_id, + eventId=event_id, + body=event_body, + conferenceDataVersion=1, + ) + .execute() + ) + ) + + link = updated_event.get("htmlLink", "No link available") + confirmation_message = f"Successfully modified event '{updated_event.get('summary', summary)}' (ID: {event_id}) for {user_google_email}. Link: {link}" + + # Add Google Meet information if conference was added + if add_google_meet is True and "conferenceData" in updated_event: + conference_data = updated_event["conferenceData"] + if "entryPoints" in conference_data: + for entry_point in conference_data["entryPoints"]: + if entry_point.get("entryPointType") == "video": + meet_link = entry_point.get("uri", "") + if meet_link: + confirmation_message += f" Google Meet: {meet_link}" + break + elif add_google_meet is False: + confirmation_message += " (Google Meet removed)" + + logger.info( + f"Event modified successfully for {user_google_email}. ID: {updated_event.get('id')}, Link: {link}" + ) + return confirmation_message + + +async def _delete_event_impl( + service, + user_google_email: str, + event_id: str, + calendar_id: str = "primary", +) -> str: + """Internal implementation for deleting a calendar event.""" + logger.info( + f"[delete_event] Invoked. Email: '{user_google_email}', Event ID: {event_id}" + ) + + # Log the event ID for debugging + logger.info( + f"[delete_event] Attempting to delete event with ID: '{event_id}' in calendar '{calendar_id}'" + ) + + # Try to get the event first to verify it exists + try: + await asyncio.to_thread( + lambda: ( + service.events().get(calendarId=calendar_id, eventId=event_id).execute() + ) + ) + logger.info("[delete_event] Successfully verified event exists before deletion") + except HttpError as get_error: + if get_error.resp.status == 404: + logger.error( + f"[delete_event] Event not found during pre-delete verification: {get_error}" + ) + message = f"Event not found during verification. The event with ID '{event_id}' could not be found in calendar '{calendar_id}'. This may be due to incorrect ID format or the event no longer exists." + raise Exception(message) + else: + logger.warning( + f"[delete_event] Error during pre-delete verification, but proceeding with deletion: {get_error}" + ) + + # Proceed with the deletion + await asyncio.to_thread( + lambda: ( + service.events().delete(calendarId=calendar_id, eventId=event_id).execute() + ) + ) + + confirmation_message = f"Successfully deleted event (ID: {event_id}) from calendar '{calendar_id}' for {user_google_email}." + logger.info(f"Event deleted successfully for {user_google_email}. ID: {event_id}") + return confirmation_message + + +# --------------------------------------------------------------------------- +# Consolidated event management tool +# --------------------------------------------------------------------------- + + +@server.tool() +@handle_http_errors("manage_event", service_type="calendar") +@require_google_service("calendar", "calendar_events") +async def manage_event( + service, + user_google_email: str, + action: str, + summary: Optional[str] = None, + start_time: Optional[str] = None, + end_time: Optional[str] = None, + event_id: Optional[str] = None, + calendar_id: str = "primary", + description: Optional[str] = None, + location: Optional[str] = None, + attendees: Optional[Union[List[str], List[Dict[str, Any]]]] = None, + timezone: Optional[str] = None, + attachments: Optional[List[str]] = None, + add_google_meet: Optional[bool] = None, + reminders: Optional[Union[str, List[Dict[str, Any]]]] = None, + use_default_reminders: Optional[bool] = None, + transparency: Optional[str] = None, + visibility: Optional[str] = None, + color_id: Optional[str] = None, + guests_can_modify: Optional[bool] = None, + guests_can_invite_others: Optional[bool] = None, + guests_can_see_other_guests: Optional[bool] = None, +) -> str: + """ + Manages calendar events. Supports creating, updating, and deleting events. + + Args: + user_google_email (str): The user's Google email address. Required. + action (str): Action to perform - "create", "update", or "delete". + summary (Optional[str]): Event title (required for create). + start_time (Optional[str]): Start time in RFC3339 format (required for create). + end_time (Optional[str]): End time in RFC3339 format (required for create). + event_id (Optional[str]): Event ID (required for update and delete). + calendar_id (str): Calendar ID (default: 'primary'). + description (Optional[str]): Event description. + location (Optional[str]): Event location. + attendees (Optional[Union[List[str], List[Dict[str, Any]]]]): Attendee email addresses or objects. + timezone (Optional[str]): Timezone (e.g., "America/New_York"). + attachments (Optional[List[str]]): List of Google Drive file URLs or IDs to attach. + add_google_meet (Optional[bool]): Whether to add/remove Google Meet. + reminders (Optional[Union[str, List[Dict[str, Any]]]]): Custom reminder objects. + use_default_reminders (Optional[bool]): Whether to use default reminders. + transparency (Optional[str]): "opaque" (busy) or "transparent" (free). + visibility (Optional[str]): "default", "public", "private", or "confidential". + color_id (Optional[str]): Event color ID (1-11, update only). + guests_can_modify (Optional[bool]): Whether attendees can modify. + guests_can_invite_others (Optional[bool]): Whether attendees can invite others. + guests_can_see_other_guests (Optional[bool]): Whether attendees can see other guests. + + Returns: + str: Confirmation message with event details. + """ + action_lower = action.lower().strip() + if action_lower == "create": + if not summary or not start_time or not end_time: + raise ValueError( + "summary, start_time, and end_time are required for create action" + ) + return await _create_event_impl( + service=service, + user_google_email=user_google_email, + summary=summary, + start_time=start_time, + end_time=end_time, + calendar_id=calendar_id, + description=description, + location=location, + attendees=attendees, + timezone=timezone, + attachments=attachments, + add_google_meet=add_google_meet or False, + reminders=reminders, + use_default_reminders=use_default_reminders + if use_default_reminders is not None + else True, + transparency=transparency, + visibility=visibility, + guests_can_modify=guests_can_modify, + guests_can_invite_others=guests_can_invite_others, + guests_can_see_other_guests=guests_can_see_other_guests, + ) + elif action_lower == "update": + if not event_id: + raise ValueError("event_id is required for update action") + return await _modify_event_impl( + service=service, + user_google_email=user_google_email, + event_id=event_id, + calendar_id=calendar_id, + summary=summary, + start_time=start_time, + end_time=end_time, + description=description, + location=location, + attendees=attendees, + timezone=timezone, + add_google_meet=add_google_meet, + reminders=reminders, + use_default_reminders=use_default_reminders, + transparency=transparency, + visibility=visibility, + color_id=color_id, + guests_can_modify=guests_can_modify, + guests_can_invite_others=guests_can_invite_others, + guests_can_see_other_guests=guests_can_see_other_guests, + ) + elif action_lower == "delete": + if not event_id: + raise ValueError("event_id is required for delete action") + return await _delete_event_impl( + service=service, + user_google_email=user_google_email, + event_id=event_id, + calendar_id=calendar_id, + ) + else: + raise ValueError( + f"Invalid action '{action_lower}'. Must be 'create', 'update', or 'delete'." + ) + + +# --------------------------------------------------------------------------- +# Legacy single-action tools (deprecated -- prefer ``manage_event``) +# --------------------------------------------------------------------------- + + +@server.tool() +@handle_http_errors("query_freebusy", is_read_only=True, service_type="calendar") +@require_google_service("calendar", "calendar_read") +async def query_freebusy( + service, + user_google_email: str, + time_min: str, + time_max: str, + calendar_ids: Optional[List[str]] = None, + group_expansion_max: Optional[int] = None, + calendar_expansion_max: Optional[int] = None, +) -> str: + """ + Returns free/busy information for a set of calendars. + + Args: + user_google_email (str): The user's Google email address. Required. + time_min (str): The start of the interval for the query in RFC3339 format (e.g., '2024-05-12T10:00:00Z' or '2024-05-12'). + time_max (str): The end of the interval for the query in RFC3339 format (e.g., '2024-05-12T18:00:00Z' or '2024-05-12'). + calendar_ids (Optional[List[str]]): List of calendar identifiers to query. If not provided, queries the primary calendar. Use 'primary' for the user's primary calendar or specific calendar IDs obtained from `list_calendars`. + group_expansion_max (Optional[int]): Maximum number of calendar identifiers to be provided for a single group. Optional. An error is returned for a group with more members than this value. Maximum value is 100. + calendar_expansion_max (Optional[int]): Maximum number of calendars for which FreeBusy information is to be provided. Optional. Maximum value is 50. + + Returns: + str: A formatted response showing free/busy information for each requested calendar, including busy time periods. + """ + logger.info( + f"[query_freebusy] Invoked. Email: '{user_google_email}', time_min: '{time_min}', time_max: '{time_max}'" + ) + + # Format time parameters + formatted_time_min = _correct_time_format_for_api(time_min, "time_min") + formatted_time_max = _correct_time_format_for_api(time_max, "time_max") + + # Default to primary calendar if no calendar IDs provided + if not calendar_ids: + calendar_ids = ["primary"] + + # Build the request body + request_body: Dict[str, Any] = { + "timeMin": formatted_time_min, + "timeMax": formatted_time_max, + "items": [{"id": cal_id} for cal_id in calendar_ids], + } + + if group_expansion_max is not None: + request_body["groupExpansionMax"] = group_expansion_max + if calendar_expansion_max is not None: + request_body["calendarExpansionMax"] = calendar_expansion_max + + logger.info( + f"[query_freebusy] Request body: timeMin={formatted_time_min}, timeMax={formatted_time_max}, calendars={calendar_ids}" + ) + + # Execute the freebusy query + freebusy_result = await asyncio.to_thread( + lambda: service.freebusy().query(body=request_body).execute() + ) + + # Parse the response + calendars = freebusy_result.get("calendars", {}) + time_min_result = freebusy_result.get("timeMin", formatted_time_min) + time_max_result = freebusy_result.get("timeMax", formatted_time_max) + + if not calendars: + return f"No free/busy information found for the requested calendars for {user_google_email}." + + # Format the output + output_lines = [ + f"Free/Busy information for {user_google_email}:", + f"Time range: {time_min_result} to {time_max_result}", + "", + ] + + for cal_id, cal_data in calendars.items(): + output_lines.append(f"Calendar: {cal_id}") + + # Check for errors + errors = cal_data.get("errors", []) + if errors: + output_lines.append(" Errors:") + for error in errors: + domain = error.get("domain", "unknown") + reason = error.get("reason", "unknown") + output_lines.append(f" - {domain}: {reason}") + output_lines.append("") + continue + + # Get busy periods + busy_periods = cal_data.get("busy", []) + if not busy_periods: + output_lines.append(" Status: Free (no busy periods)") + else: + output_lines.append(f" Busy periods: {len(busy_periods)}") + for period in busy_periods: + start = period.get("start", "Unknown") + end = period.get("end", "Unknown") + output_lines.append(f" - {start} to {end}") + + output_lines.append("") + + result_text = "\n".join(output_lines) + logger.info( + f"[query_freebusy] Successfully retrieved free/busy information for {len(calendars)} calendar(s)" + ) + return result_text diff --git a/gchat/__init__.py b/gchat/__init__.py new file mode 100644 index 0000000..1792fd4 --- /dev/null +++ b/gchat/__init__.py @@ -0,0 +1,7 @@ +""" +Google Chat MCP Tools Package +""" + +from . import chat_tools + +__all__ = ["chat_tools"] diff --git a/gchat/chat_tools.py b/gchat/chat_tools.py new file mode 100644 index 0000000..762cc10 --- /dev/null +++ b/gchat/chat_tools.py @@ -0,0 +1,583 @@ +""" +Google Chat MCP Tools + +This module provides MCP tools for interacting with Google Chat API. +""" + +import base64 +import logging +import asyncio +from typing import Dict, List, Optional + +import httpx +from googleapiclient.errors import HttpError + +# Auth & server utilities +from auth.service_decorator import require_google_service, require_multiple_services +from core.server import server +from core.utils import handle_http_errors + +logger = logging.getLogger(__name__) + +# In-memory cache for user ID → display name (bounded to avoid unbounded growth) +_SENDER_CACHE_MAX_SIZE = 256 +_sender_name_cache: Dict[str, str] = {} + + +def _cache_sender(user_id: str, name: str) -> None: + """Store a resolved sender name, evicting oldest entries if cache is full.""" + if len(_sender_name_cache) >= _SENDER_CACHE_MAX_SIZE: + to_remove = list(_sender_name_cache.keys())[: _SENDER_CACHE_MAX_SIZE // 2] + for k in to_remove: + del _sender_name_cache[k] + _sender_name_cache[user_id] = name + + +async def _resolve_sender(people_service, sender_obj: dict) -> str: + """Resolve a Chat message sender to a display name. + + Fast path: use displayName if the API already provided it. + Slow path: look up the user via the People API directory and cache the result. + """ + # Fast path — Chat API sometimes provides displayName directly + display_name = sender_obj.get("displayName") + if display_name: + return display_name + + user_id = sender_obj.get("name", "") # e.g. "users/123456789" + if not user_id: + return "Unknown Sender" + + # Check cache + if user_id in _sender_name_cache: + return _sender_name_cache[user_id] + + # Try People API directory lookup + # Chat API uses "users/ID" but People API expects "people/ID" + people_resource = user_id.replace("users/", "people/", 1) + if people_service: + try: + person = await asyncio.to_thread( + people_service.people() + .get(resourceName=people_resource, personFields="names,emailAddresses") + .execute + ) + names = person.get("names", []) + if names: + resolved = names[0].get("displayName", user_id) + _cache_sender(user_id, resolved) + return resolved + # Fall back to email if no name + emails = person.get("emailAddresses", []) + if emails: + resolved = emails[0].get("value", user_id) + _cache_sender(user_id, resolved) + return resolved + except HttpError as e: + logger.debug(f"People API lookup failed for {user_id}: {e}") + except Exception as e: + logger.debug(f"Unexpected error resolving {user_id}: {e}") + + # Final fallback + _cache_sender(user_id, user_id) + return user_id + + +def _extract_rich_links(msg: dict) -> List[str]: + """Extract URLs from RICH_LINK annotations (smart chips). + + When a user pastes a Google Workspace URL in Chat and it renders as a + smart chip, the URL is NOT in the text field — it's only available in + the annotations array as a RICH_LINK with richLinkMetadata.uri. + """ + text = msg.get("text", "") + urls = [] + for ann in msg.get("annotations", []): + if ann.get("type") == "RICH_LINK": + uri = ann.get("richLinkMetadata", {}).get("uri", "") + if uri and uri not in text: + urls.append(uri) + return urls + + +@server.tool() +@require_google_service("chat", "chat_spaces_readonly") +@handle_http_errors("list_spaces", service_type="chat") +async def list_spaces( + service, + user_google_email: str, + page_size: int = 100, + space_type: str = "all", # "all", "room", "dm" +) -> str: + """ + Lists Google Chat spaces (rooms and direct messages) accessible to the user. + + Returns: + str: A formatted list of Google Chat spaces accessible to the user. + """ + logger.info(f"[list_spaces] Email={user_google_email}, Type={space_type}") + + # Build filter based on space_type + filter_param = None + if space_type == "room": + filter_param = "spaceType = SPACE" + elif space_type == "dm": + filter_param = "spaceType = DIRECT_MESSAGE" + + request_params = {"pageSize": page_size} + if filter_param: + request_params["filter"] = filter_param + + response = await asyncio.to_thread(service.spaces().list(**request_params).execute) + + spaces = response.get("spaces", []) + if not spaces: + return f"No Chat spaces found for type '{space_type}'." + + output = [f"Found {len(spaces)} Chat spaces (type: {space_type}):"] + for space in spaces: + space_name = space.get("displayName", "Unnamed Space") + space_id = space.get("name", "") + space_type_actual = space.get("spaceType", "UNKNOWN") + output.append(f"- {space_name} (ID: {space_id}, Type: {space_type_actual})") + + return "\n".join(output) + + +@server.tool() +@require_multiple_services( + [ + {"service_type": "chat", "scopes": "chat_read", "param_name": "chat_service"}, + { + "service_type": "people", + "scopes": "contacts_read", + "param_name": "people_service", + }, + ] +) +@handle_http_errors("get_messages", service_type="chat") +async def get_messages( + chat_service, + people_service, + user_google_email: str, + space_id: str, + page_size: int = 50, + order_by: str = "createTime desc", +) -> str: + """ + Retrieves messages from a Google Chat space. + + Returns: + str: Formatted messages from the specified space. + """ + logger.info(f"[get_messages] Space ID: '{space_id}' for user '{user_google_email}'") + + # Get space info first + space_info = await asyncio.to_thread( + chat_service.spaces().get(name=space_id).execute + ) + space_name = space_info.get("displayName", "Unknown Space") + + # Get messages + response = await asyncio.to_thread( + chat_service.spaces() + .messages() + .list(parent=space_id, pageSize=page_size, orderBy=order_by) + .execute + ) + + messages = response.get("messages", []) + if not messages: + return f"No messages found in space '{space_name}' (ID: {space_id})." + + # Pre-resolve unique senders in parallel + sender_lookup = {} + for msg in messages: + s = msg.get("sender", {}) + key = s.get("name", "") + if key and key not in sender_lookup: + sender_lookup[key] = s + resolved_names = await asyncio.gather( + *[_resolve_sender(people_service, s) for s in sender_lookup.values()] + ) + sender_map = dict(zip(sender_lookup.keys(), resolved_names)) + + output = [f"Messages from '{space_name}' (ID: {space_id}):\n"] + for msg in messages: + sender_obj = msg.get("sender", {}) + sender_key = sender_obj.get("name", "") + sender = sender_map.get(sender_key) or await _resolve_sender( + people_service, sender_obj + ) + create_time = msg.get("createTime", "Unknown Time") + text_content = msg.get("text", "No text content") + msg_name = msg.get("name", "") + + output.append(f"[{create_time}] {sender}:") + output.append(f" {text_content}") + rich_links = _extract_rich_links(msg) + for url in rich_links: + output.append(f" [linked: {url}]") + # Show attachments + attachments = msg.get("attachment", []) + for idx, att in enumerate(attachments): + att_name = att.get("contentName", "unnamed") + att_type = att.get("contentType", "unknown type") + att_resource = att.get("name", "") + output.append(f" [attachment {idx}: {att_name} ({att_type})]") + if att_resource: + output.append( + f" Use download_chat_attachment(message_id='{msg_name}', attachment_index={idx}) to download" + ) + # Show thread info if this is a threaded reply + thread = msg.get("thread", {}) + if msg.get("threadReply") and thread.get("name"): + output.append(f" [thread: {thread['name']}]") + # Show emoji reactions + reactions = msg.get("emojiReactionSummaries", []) + if reactions: + parts = [] + for r in reactions: + emoji = r.get("emoji", {}) + symbol = emoji.get("unicode", "") + if not symbol: + ce = emoji.get("customEmoji", {}) + symbol = f":{ce.get('uid', '?')}:" + count = r.get("reactionCount", 0) + parts.append(f"{symbol}x{count}") + output.append(f" [reactions: {', '.join(parts)}]") + output.append(f" (Message ID: {msg_name})\n") + + return "\n".join(output) + + +@server.tool() +@require_google_service("chat", "chat_write") +@handle_http_errors("send_message", service_type="chat") +async def send_message( + service, + user_google_email: str, + space_id: str, + message_text: str, + thread_key: Optional[str] = None, + thread_name: Optional[str] = None, +) -> str: + """ + Sends a message to a Google Chat space. + + Args: + thread_name: Reply in an existing thread by its resource name (e.g. spaces/X/threads/Y). + thread_key: Reply in a thread by app-defined key (creates thread if not found). + + Returns: + str: Confirmation message with sent message details. + """ + logger.info(f"[send_message] Email: '{user_google_email}', Space: '{space_id}'") + + message_body = {"text": message_text} + + request_params = {"parent": space_id, "body": message_body} + + # Thread reply support + if thread_name: + message_body["thread"] = {"name": thread_name} + request_params["messageReplyOption"] = "REPLY_MESSAGE_FALLBACK_TO_NEW_THREAD" + elif thread_key: + message_body["thread"] = {"threadKey": thread_key} + request_params["messageReplyOption"] = "REPLY_MESSAGE_FALLBACK_TO_NEW_THREAD" + + message = await asyncio.to_thread( + service.spaces().messages().create(**request_params).execute + ) + + message_name = message.get("name", "") + create_time = message.get("createTime", "") + + msg = f"Message sent to space '{space_id}' by {user_google_email}. Message ID: {message_name}, Time: {create_time}" + logger.info( + f"Successfully sent message to space '{space_id}' by {user_google_email}" + ) + return msg + + +@server.tool() +@require_multiple_services( + [ + {"service_type": "chat", "scopes": "chat_read", "param_name": "chat_service"}, + { + "service_type": "people", + "scopes": "contacts_read", + "param_name": "people_service", + }, + ] +) +@handle_http_errors("search_messages", service_type="chat") +async def search_messages( + chat_service, + people_service, + user_google_email: str, + query: str, + space_id: Optional[str] = None, + page_size: int = 25, +) -> str: + """ + Searches for messages in Google Chat spaces by text content. + + Returns: + str: A formatted list of messages matching the search query. + """ + logger.info(f"[search_messages] Email={user_google_email}, Query='{query}'") + + # If specific space provided, search within that space + if space_id: + response = await asyncio.to_thread( + chat_service.spaces() + .messages() + .list(parent=space_id, pageSize=page_size, filter=f'text:"{query}"') + .execute + ) + messages = response.get("messages", []) + context = f"space '{space_id}'" + else: + # Search across all accessible spaces (this may require iterating through spaces) + # For simplicity, we'll search the user's spaces first + spaces_response = await asyncio.to_thread( + chat_service.spaces().list(pageSize=100).execute + ) + spaces = spaces_response.get("spaces", []) + + messages = [] + for space in spaces[:10]: # Limit to first 10 spaces to avoid timeout + try: + space_messages = await asyncio.to_thread( + chat_service.spaces() + .messages() + .list( + parent=space.get("name"), pageSize=5, filter=f'text:"{query}"' + ) + .execute + ) + space_msgs = space_messages.get("messages", []) + for msg in space_msgs: + msg["_space_name"] = space.get("displayName", "Unknown") + messages.extend(space_msgs) + except HttpError as e: + logger.debug( + "Skipping space %s during search: %s", space.get("name"), e + ) + continue + context = "all accessible spaces" + + if not messages: + return f"No messages found matching '{query}' in {context}." + + # Pre-resolve unique senders in parallel + sender_lookup = {} + for msg in messages: + s = msg.get("sender", {}) + key = s.get("name", "") + if key and key not in sender_lookup: + sender_lookup[key] = s + resolved_names = await asyncio.gather( + *[_resolve_sender(people_service, s) for s in sender_lookup.values()] + ) + sender_map = dict(zip(sender_lookup.keys(), resolved_names)) + + output = [f"Found {len(messages)} messages matching '{query}' in {context}:"] + for msg in messages: + sender_obj = msg.get("sender", {}) + sender_key = sender_obj.get("name", "") + sender = sender_map.get(sender_key) or await _resolve_sender( + people_service, sender_obj + ) + create_time = msg.get("createTime", "Unknown Time") + text_content = msg.get("text", "No text content") + space_name = msg.get("_space_name", "Unknown Space") + + # Truncate long messages + if len(text_content) > 100: + text_content = text_content[:100] + "..." + + rich_links = _extract_rich_links(msg) + links_suffix = "".join(f" [linked: {url}]" for url in rich_links) + attachments = msg.get("attachment", []) + att_suffix = "".join( + f" [attachment: {a.get('contentName', 'unnamed')} ({a.get('contentType', 'unknown type')})]" + for a in attachments + ) + output.append( + f"- [{create_time}] {sender} in '{space_name}': {text_content}{links_suffix}{att_suffix}" + ) + + return "\n".join(output) + + +@server.tool() +@require_google_service("chat", "chat_write") +@handle_http_errors("create_reaction", service_type="chat") +async def create_reaction( + service, + user_google_email: str, + message_id: str, + emoji_unicode: str, +) -> str: + """ + Adds an emoji reaction to a Google Chat message. + + Args: + message_id: The message resource name (e.g. spaces/X/messages/Y). + emoji_unicode: The emoji character to react with (e.g. 👍). + + Returns: + str: Confirmation message. + """ + logger.info(f"[create_reaction] Message: '{message_id}', Emoji: '{emoji_unicode}'") + + reaction = await asyncio.to_thread( + service.spaces() + .messages() + .reactions() + .create( + parent=message_id, + body={"emoji": {"unicode": emoji_unicode}}, + ) + .execute + ) + + reaction_name = reaction.get("name", "") + return f"Reacted with {emoji_unicode} on message {message_id}. Reaction ID: {reaction_name}" + + +@server.tool() +@handle_http_errors("download_chat_attachment", is_read_only=True, service_type="chat") +@require_google_service("chat", "chat_read") +async def download_chat_attachment( + service, + user_google_email: str, + message_id: str, + attachment_index: int = 0, +) -> str: + """ + Downloads an attachment from a Google Chat message and saves it to local disk. + + In stdio mode, returns the local file path for direct access. + In HTTP mode, returns a temporary download URL (valid for 1 hour). + + Args: + message_id: The message resource name (e.g. spaces/X/messages/Y). + attachment_index: Zero-based index of the attachment to download (default 0). + + Returns: + str: Attachment metadata with either a local file path or download URL. + """ + logger.info( + f"[download_chat_attachment] Message: '{message_id}', Index: {attachment_index}" + ) + + # Fetch the message to get attachment metadata + msg = await asyncio.to_thread( + service.spaces().messages().get(name=message_id).execute + ) + + attachments = msg.get("attachment", []) + if not attachments: + return f"No attachments found on message {message_id}." + + if attachment_index < 0 or attachment_index >= len(attachments): + return ( + f"Invalid attachment_index {attachment_index}. " + f"Message has {len(attachments)} attachment(s) (0-{len(attachments) - 1})." + ) + + att = attachments[attachment_index] + filename = att.get("contentName", "attachment") + content_type = att.get("contentType", "application/octet-stream") + source = att.get("source", "") + + # The media endpoint needs attachmentDataRef.resourceName (e.g. + # "spaces/S/attachments/A"), NOT the attachment name which includes + # the /messages/ segment and causes 400 errors. + media_resource = att.get("attachmentDataRef", {}).get("resourceName", "") + att_name = att.get("name", "") + + logger.info( + f"[download_chat_attachment] Downloading '{filename}' ({content_type}), " + f"source={source}, mediaResource={media_resource}, name={att_name}" + ) + + # Download the attachment binary data via the Chat API media endpoint. + # We use httpx with the Bearer token directly because MediaIoBaseDownload + # and AuthorizedHttp fail in OAuth 2.1 (no refresh_token). The attachment's + # downloadUri points to chat.google.com which requires browser cookies. + if not media_resource and not att_name: + return f"No resource name available for attachment '{filename}'." + + # Prefer attachmentDataRef.resourceName for the media endpoint + resource_name = media_resource or att_name + download_url = f"https://chat.googleapis.com/v1/media/{resource_name}?alt=media" + + try: + access_token = service._http.credentials.token + async with httpx.AsyncClient(follow_redirects=True) as client: + resp = await client.get( + download_url, + headers={"Authorization": f"Bearer {access_token}"}, + ) + if resp.status_code != 200: + body = resp.text[:500] + return ( + f"Failed to download attachment '{filename}': " + f"HTTP {resp.status_code} from {download_url}\n{body}" + ) + file_bytes = resp.content + except Exception as e: + return f"Failed to download attachment '{filename}': {e}" + + size_bytes = len(file_bytes) + size_kb = size_bytes / 1024 + + # Check if we're in stateless mode (can't save files) + from auth.oauth_config import is_stateless_mode + + if is_stateless_mode(): + b64_preview = base64.urlsafe_b64encode(file_bytes).decode("utf-8")[:100] + return "\n".join( + [ + f"Attachment downloaded: {filename} ({content_type})", + f"Size: {size_kb:.1f} KB ({size_bytes} bytes)", + "", + "Stateless mode: File storage disabled.", + f"Base64 preview: {b64_preview}...", + ] + ) + + # Save to local disk + from core.attachment_storage import get_attachment_storage, get_attachment_url + from core.config import get_transport_mode + + storage = get_attachment_storage() + b64_data = base64.urlsafe_b64encode(file_bytes).decode("utf-8") + result = storage.save_attachment( + base64_data=b64_data, filename=filename, mime_type=content_type + ) + + result_lines = [ + f"Attachment downloaded: {filename}", + f"Type: {content_type}", + f"Size: {size_kb:.1f} KB ({size_bytes} bytes)", + ] + + if get_transport_mode() == "stdio": + result_lines.append(f"\nSaved to: {result.path}") + result_lines.append( + "\nThe file has been saved to disk and can be accessed directly via the file path." + ) + else: + download_url = get_attachment_url(result.file_id) + result_lines.append(f"\nDownload URL: {download_url}") + result_lines.append("\nThe file will expire after 1 hour.") + + logger.info( + f"[download_chat_attachment] Saved {size_kb:.1f} KB attachment to {result.path}" + ) + return "\n".join(result_lines) diff --git a/gcontacts/__init__.py b/gcontacts/__init__.py new file mode 100644 index 0000000..b37df17 --- /dev/null +++ b/gcontacts/__init__.py @@ -0,0 +1 @@ +# Google Contacts (People API) tools diff --git a/gcontacts/contacts_tools.py b/gcontacts/contacts_tools.py new file mode 100644 index 0000000..ab04053 --- /dev/null +++ b/gcontacts/contacts_tools.py @@ -0,0 +1,1052 @@ +""" +Google Contacts MCP Tools (People API) + +This module provides MCP tools for interacting with Google Contacts via the People API. +""" + +import asyncio +import logging +from typing import Any, Dict, List, Optional + +from googleapiclient.errors import HttpError +from mcp import Resource + +from auth.service_decorator import require_google_service +from core.server import server +from core.utils import UserInputError, handle_http_errors + +logger = logging.getLogger(__name__) + +# Default person fields for list/search operations +DEFAULT_PERSON_FIELDS = "names,emailAddresses,phoneNumbers,organizations" + +# Detailed person fields for get operations +DETAILED_PERSON_FIELDS = ( + "names,emailAddresses,phoneNumbers,organizations,biographies," + "addresses,birthdays,urls,photos,metadata,memberships" +) + +# Contact group fields +CONTACT_GROUP_FIELDS = "name,groupType,memberCount,metadata" + +# Cache warmup tracking +_search_cache_warmed_up: Dict[str, bool] = {} + + +def _format_contact(person: Dict[str, Any], detailed: bool = False) -> str: + """ + Format a Person resource into a readable string. + + Args: + person: The Person resource from the People API. + detailed: Whether to include detailed fields. + + Returns: + Formatted string representation of the contact. + """ + resource_name = person.get("resourceName", "Unknown") + contact_id = resource_name.replace("people/", "") if resource_name else "Unknown" + + lines = [f"Contact ID: {contact_id}"] + + # Names + names = person.get("names", []) + if names: + primary_name = names[0] + display_name = primary_name.get("displayName", "") + if display_name: + lines.append(f"Name: {display_name}") + + # Email addresses + emails = person.get("emailAddresses", []) + if emails: + email_list = [e.get("value", "") for e in emails if e.get("value")] + if email_list: + lines.append(f"Email: {', '.join(email_list)}") + + # Phone numbers + phones = person.get("phoneNumbers", []) + if phones: + phone_list = [p.get("value", "") for p in phones if p.get("value")] + if phone_list: + lines.append(f"Phone: {', '.join(phone_list)}") + + # Organizations + orgs = person.get("organizations", []) + if orgs: + org = orgs[0] + org_parts = [] + if org.get("title"): + org_parts.append(org["title"]) + if org.get("name"): + org_parts.append(f"at {org['name']}") + if org_parts: + lines.append(f"Organization: {' '.join(org_parts)}") + + if detailed: + # Addresses + addresses = person.get("addresses", []) + if addresses: + addr = addresses[0] + formatted_addr = addr.get("formattedValue", "") + if formatted_addr: + lines.append(f"Address: {formatted_addr}") + + # Birthday + birthdays = person.get("birthdays", []) + if birthdays: + bday = birthdays[0].get("date", {}) + if bday: + bday_str = f"{bday.get('month', '?')}/{bday.get('day', '?')}" + if bday.get("year"): + bday_str = f"{bday.get('year')}/{bday_str}" + lines.append(f"Birthday: {bday_str}") + + # URLs + urls = person.get("urls", []) + if urls: + url_list = [u.get("value", "") for u in urls if u.get("value")] + if url_list: + lines.append(f"URLs: {', '.join(url_list)}") + + # Biography/Notes + bios = person.get("biographies", []) + if bios: + bio = bios[0].get("value", "") + if bio: + # Truncate long bios + if len(bio) > 200: + bio = bio[:200] + "..." + lines.append(f"Notes: {bio}") + + # Metadata + metadata = person.get("metadata", {}) + if metadata: + sources = metadata.get("sources", []) + if sources: + source_types = [s.get("type", "") for s in sources] + if source_types: + lines.append(f"Sources: {', '.join(source_types)}") + + return "\n".join(lines) + + +def _build_person_body( + given_name: Optional[str] = None, + family_name: Optional[str] = None, + email: Optional[str] = None, + phone: Optional[str] = None, + organization: Optional[str] = None, + job_title: Optional[str] = None, + notes: Optional[str] = None, + address: Optional[str] = None, +) -> Dict[str, Any]: + """ + Build a Person resource body for create/update operations. + + Args: + given_name: First name. + family_name: Last name. + email: Email address. + phone: Phone number. + organization: Company/organization name. + job_title: Job title. + notes: Additional notes/biography. + address: Street address. + + Returns: + Person resource body dictionary. + """ + body: Dict[str, Any] = {} + + if given_name or family_name: + body["names"] = [ + { + "givenName": given_name or "", + "familyName": family_name or "", + } + ] + + if email: + body["emailAddresses"] = [{"value": email}] + + if phone: + body["phoneNumbers"] = [{"value": phone}] + + if organization or job_title: + org_entry: Dict[str, str] = {} + if organization: + org_entry["name"] = organization + if job_title: + org_entry["title"] = job_title + body["organizations"] = [org_entry] + + if notes: + body["biographies"] = [{"value": notes, "contentType": "TEXT_PLAIN"}] + + if address: + body["addresses"] = [{"formattedValue": address}] + + return body + + +async def _warmup_search_cache(service: Resource, user_google_email: str) -> None: + """ + Warm up the People API search cache. + + The People API requires an initial empty query to warm up the search cache + before searches will return results. + + Args: + service: Authenticated People API service. + user_google_email: User's email for tracking. + """ + global _search_cache_warmed_up + + if _search_cache_warmed_up.get(user_google_email): + return + + try: + logger.debug(f"[contacts] Warming up search cache for {user_google_email}") + await asyncio.to_thread( + service.people() + .searchContacts(query="", readMask="names", pageSize=1) + .execute + ) + _search_cache_warmed_up[user_google_email] = True + logger.debug(f"[contacts] Search cache warmed up for {user_google_email}") + except HttpError as e: + # Warmup failure is non-fatal, search may still work + logger.warning(f"[contacts] Search cache warmup failed: {e}") + + +# ============================================================================= +# Core Tier Tools +# ============================================================================= + + +@server.tool() +@require_google_service("people", "contacts_read") +@handle_http_errors("list_contacts", service_type="people") +async def list_contacts( + service: Resource, + user_google_email: str, + page_size: int = 100, + page_token: Optional[str] = None, + sort_order: Optional[str] = None, +) -> str: + """ + List contacts for the authenticated user. + + Args: + user_google_email (str): The user's Google email address. Required. + page_size (int): Maximum number of contacts to return (default: 100, max: 1000). + page_token (Optional[str]): Token for pagination. + sort_order (Optional[str]): Sort order: "LAST_MODIFIED_ASCENDING", "LAST_MODIFIED_DESCENDING", "FIRST_NAME_ASCENDING", or "LAST_NAME_ASCENDING". + + Returns: + str: List of contacts with their basic information. + """ + logger.info(f"[list_contacts] Invoked. Email: '{user_google_email}'") + + if page_size < 1: + raise UserInputError("page_size must be >= 1") + page_size = min(page_size, 1000) + + params: Dict[str, Any] = { + "resourceName": "people/me", + "personFields": DEFAULT_PERSON_FIELDS, + "pageSize": page_size, + } + + if page_token: + params["pageToken"] = page_token + if sort_order: + params["sortOrder"] = sort_order + + result = await asyncio.to_thread( + service.people().connections().list(**params).execute + ) + + connections = result.get("connections", []) + next_page_token = result.get("nextPageToken") + total_people = result.get("totalPeople", len(connections)) + + if not connections: + return f"No contacts found for {user_google_email}." + + response = ( + f"Contacts for {user_google_email} ({len(connections)} of {total_people}):\n\n" + ) + + for person in connections: + response += _format_contact(person) + "\n\n" + + if next_page_token: + response += f"Next page token: {next_page_token}" + + logger.info(f"Found {len(connections)} contacts for {user_google_email}") + return response + + +@server.tool() +@require_google_service("people", "contacts_read") +@handle_http_errors("get_contact", service_type="people") +async def get_contact( + service: Resource, + user_google_email: str, + contact_id: str, +) -> str: + """ + Get detailed information about a specific contact. + + Args: + user_google_email (str): The user's Google email address. Required. + contact_id (str): The contact ID (e.g., "c1234567890" or full resource name "people/c1234567890"). + + Returns: + str: Detailed contact information. + """ + # Normalize resource name + if not contact_id.startswith("people/"): + resource_name = f"people/{contact_id}" + else: + resource_name = contact_id + + logger.info( + f"[get_contact] Invoked. Email: '{user_google_email}', Contact: {resource_name}" + ) + + person = await asyncio.to_thread( + service.people() + .get(resourceName=resource_name, personFields=DETAILED_PERSON_FIELDS) + .execute + ) + + response = f"Contact Details for {user_google_email}:\n\n" + response += _format_contact(person, detailed=True) + + logger.info(f"Retrieved contact {resource_name} for {user_google_email}") + return response + + +@server.tool() +@require_google_service("people", "contacts_read") +@handle_http_errors("search_contacts", service_type="people") +async def search_contacts( + service: Resource, + user_google_email: str, + query: str, + page_size: int = 30, +) -> str: + """ + Search contacts by name, email, phone number, or other fields. + + Args: + user_google_email (str): The user's Google email address. Required. + query (str): Search query string (searches names, emails, phone numbers). + page_size (int): Maximum number of results to return (default: 30, max: 30). + + Returns: + str: Matching contacts with their basic information. + """ + logger.info( + f"[search_contacts] Invoked. Email: '{user_google_email}', Query: '{query}'" + ) + + if page_size < 1: + raise UserInputError("page_size must be >= 1") + page_size = min(page_size, 30) + + # Warm up the search cache if needed + await _warmup_search_cache(service, user_google_email) + + result = await asyncio.to_thread( + service.people() + .searchContacts( + query=query, + readMask=DEFAULT_PERSON_FIELDS, + pageSize=page_size, + ) + .execute + ) + + results = result.get("results", []) + + if not results: + return f"No contacts found matching '{query}' for {user_google_email}." + + response = f"Search Results for '{query}' ({len(results)} found):\n\n" + + for item in results: + person = item.get("person", {}) + response += _format_contact(person) + "\n\n" + + logger.info( + f"Found {len(results)} contacts matching '{query}' for {user_google_email}" + ) + return response + + +@server.tool() +@require_google_service("people", "contacts") +@handle_http_errors("manage_contact", service_type="people") +async def manage_contact( + service: Resource, + user_google_email: str, + action: str, + contact_id: Optional[str] = None, + given_name: Optional[str] = None, + family_name: Optional[str] = None, + email: Optional[str] = None, + phone: Optional[str] = None, + organization: Optional[str] = None, + job_title: Optional[str] = None, + notes: Optional[str] = None, +) -> str: + """ + Create, update, or delete a contact. Consolidated tool replacing create_contact, + update_contact, and delete_contact. + + Args: + user_google_email (str): The user's Google email address. Required. + action (str): The action to perform: "create", "update", or "delete". + contact_id (Optional[str]): The contact ID. Required for "update" and "delete" actions. + given_name (Optional[str]): First name (for create/update). + family_name (Optional[str]): Last name (for create/update). + email (Optional[str]): Email address (for create/update). + phone (Optional[str]): Phone number (for create/update). + organization (Optional[str]): Company/organization name (for create/update). + job_title (Optional[str]): Job title (for create/update). + notes (Optional[str]): Additional notes (for create/update). + + Returns: + str: Result of the action performed. + """ + action = action.lower().strip() + if action not in ("create", "update", "delete"): + raise UserInputError( + f"Invalid action '{action}'. Must be 'create', 'update', or 'delete'." + ) + + logger.info( + f"[manage_contact] Invoked. Action: '{action}', Email: '{user_google_email}'" + ) + + if action == "create": + body = _build_person_body( + given_name=given_name, + family_name=family_name, + email=email, + phone=phone, + organization=organization, + job_title=job_title, + notes=notes, + ) + + if not body: + raise UserInputError( + "At least one field (name, email, phone, etc.) must be provided." + ) + + result = await asyncio.to_thread( + service.people() + .createContact(body=body, personFields=DETAILED_PERSON_FIELDS) + .execute + ) + + response = f"Contact Created for {user_google_email}:\n\n" + response += _format_contact(result, detailed=True) + + created_id = result.get("resourceName", "").replace("people/", "") + logger.info(f"Created contact {created_id} for {user_google_email}") + return response + + # update and delete both require contact_id + if not contact_id: + raise UserInputError(f"contact_id is required for '{action}' action.") + + # Normalize resource name + if not contact_id.startswith("people/"): + resource_name = f"people/{contact_id}" + else: + resource_name = contact_id + + if action == "update": + # Fetch the contact to get the etag + current = await asyncio.to_thread( + service.people() + .get(resourceName=resource_name, personFields=DETAILED_PERSON_FIELDS) + .execute + ) + + etag = current.get("etag") + if not etag: + raise Exception("Unable to get contact etag for update.") + + body = _build_person_body( + given_name=given_name, + family_name=family_name, + email=email, + phone=phone, + organization=organization, + job_title=job_title, + notes=notes, + ) + + if not body: + raise UserInputError( + "At least one field (name, email, phone, etc.) must be provided." + ) + + body["etag"] = etag + + update_person_fields = [] + if "names" in body: + update_person_fields.append("names") + if "emailAddresses" in body: + update_person_fields.append("emailAddresses") + if "phoneNumbers" in body: + update_person_fields.append("phoneNumbers") + if "organizations" in body: + update_person_fields.append("organizations") + if "biographies" in body: + update_person_fields.append("biographies") + if "addresses" in body: + update_person_fields.append("addresses") + + result = await asyncio.to_thread( + service.people() + .updateContact( + resourceName=resource_name, + body=body, + updatePersonFields=",".join(update_person_fields), + personFields=DETAILED_PERSON_FIELDS, + ) + .execute + ) + + response = f"Contact Updated for {user_google_email}:\n\n" + response += _format_contact(result, detailed=True) + + logger.info(f"Updated contact {resource_name} for {user_google_email}") + return response + + # action == "delete" + await asyncio.to_thread( + service.people().deleteContact(resourceName=resource_name).execute + ) + + response = f"Contact {contact_id} has been deleted for {user_google_email}." + logger.info(f"Deleted contact {resource_name} for {user_google_email}") + return response + + +# ============================================================================= +# Extended Tier Tools +# ============================================================================= + + +@server.tool() +@require_google_service("people", "contacts_read") +@handle_http_errors("list_contact_groups", service_type="people") +async def list_contact_groups( + service: Resource, + user_google_email: str, + page_size: int = 100, + page_token: Optional[str] = None, +) -> str: + """ + List contact groups (labels) for the user. + + Args: + user_google_email (str): The user's Google email address. Required. + page_size (int): Maximum number of groups to return (default: 100, max: 1000). + page_token (Optional[str]): Token for pagination. + + Returns: + str: List of contact groups with their details. + """ + logger.info(f"[list_contact_groups] Invoked. Email: '{user_google_email}'") + + if page_size < 1: + raise UserInputError("page_size must be >= 1") + page_size = min(page_size, 1000) + + params: Dict[str, Any] = { + "pageSize": page_size, + "groupFields": CONTACT_GROUP_FIELDS, + } + + if page_token: + params["pageToken"] = page_token + + result = await asyncio.to_thread(service.contactGroups().list(**params).execute) + + groups = result.get("contactGroups", []) + next_page_token = result.get("nextPageToken") + + if not groups: + return f"No contact groups found for {user_google_email}." + + response = f"Contact Groups for {user_google_email}:\n\n" + + for group in groups: + resource_name = group.get("resourceName", "") + group_id = resource_name.replace("contactGroups/", "") + name = group.get("name", "Unnamed") + group_type = group.get("groupType", "USER_CONTACT_GROUP") + member_count = group.get("memberCount", 0) + + response += f"- {name}\n" + response += f" ID: {group_id}\n" + response += f" Type: {group_type}\n" + response += f" Members: {member_count}\n\n" + + if next_page_token: + response += f"Next page token: {next_page_token}" + + logger.info(f"Found {len(groups)} contact groups for {user_google_email}") + return response + + +@server.tool() +@require_google_service("people", "contacts_read") +@handle_http_errors("get_contact_group", service_type="people") +async def get_contact_group( + service: Resource, + user_google_email: str, + group_id: str, + max_members: int = 100, +) -> str: + """ + Get details of a specific contact group including its members. + + Args: + user_google_email (str): The user's Google email address. Required. + group_id (str): The contact group ID. + max_members (int): Maximum number of members to return (default: 100, max: 1000). + + Returns: + str: Contact group details including members. + """ + # Normalize resource name + if not group_id.startswith("contactGroups/"): + resource_name = f"contactGroups/{group_id}" + else: + resource_name = group_id + + logger.info( + f"[get_contact_group] Invoked. Email: '{user_google_email}', Group: {resource_name}" + ) + + if max_members < 1: + raise UserInputError("max_members must be >= 1") + max_members = min(max_members, 1000) + + result = await asyncio.to_thread( + service.contactGroups() + .get( + resourceName=resource_name, + maxMembers=max_members, + groupFields=CONTACT_GROUP_FIELDS, + ) + .execute + ) + + name = result.get("name", "Unnamed") + group_type = result.get("groupType", "USER_CONTACT_GROUP") + member_count = result.get("memberCount", 0) + member_resource_names = result.get("memberResourceNames", []) + + response = f"Contact Group Details for {user_google_email}:\n\n" + response += f"Name: {name}\n" + response += f"ID: {group_id}\n" + response += f"Type: {group_type}\n" + response += f"Total Members: {member_count}\n" + + if member_resource_names: + response += f"\nMembers ({len(member_resource_names)} shown):\n" + for member in member_resource_names: + contact_id = member.replace("people/", "") + response += f" - {contact_id}\n" + + logger.info(f"Retrieved contact group {resource_name} for {user_google_email}") + return response + + +# ============================================================================= +# Complete Tier Tools +# ============================================================================= + + +@server.tool() +@require_google_service("people", "contacts") +@handle_http_errors("manage_contacts_batch", service_type="people") +async def manage_contacts_batch( + service: Resource, + user_google_email: str, + action: str, + contacts: Optional[List[Dict[str, str]]] = None, + updates: Optional[List[Dict[str, str]]] = None, + contact_ids: Optional[List[str]] = None, +) -> str: + """ + Batch create, update, or delete contacts. Consolidated tool replacing + batch_create_contacts, batch_update_contacts, and batch_delete_contacts. + + Args: + user_google_email (str): The user's Google email address. Required. + action (str): The action to perform: "create", "update", or "delete". + contacts (Optional[List[Dict[str, str]]]): List of contact dicts for "create" action. + Each dict may contain: given_name, family_name, email, phone, organization, job_title. + updates (Optional[List[Dict[str, str]]]): List of update dicts for "update" action. + Each dict must contain contact_id and may contain: given_name, family_name, + email, phone, organization, job_title. + contact_ids (Optional[List[str]]): List of contact IDs for "delete" action. + + Returns: + str: Result of the batch action performed. + """ + action = action.lower().strip() + if action not in ("create", "update", "delete"): + raise UserInputError( + f"Invalid action '{action}'. Must be 'create', 'update', or 'delete'." + ) + + logger.info( + f"[manage_contacts_batch] Invoked. Action: '{action}', Email: '{user_google_email}'" + ) + + if action == "create": + if not contacts: + raise UserInputError("contacts parameter is required for 'create' action.") + + if len(contacts) > 200: + raise UserInputError("Maximum 200 contacts can be created in a batch.") + + contact_bodies = [] + for contact in contacts: + body = _build_person_body( + given_name=contact.get("given_name"), + family_name=contact.get("family_name"), + email=contact.get("email"), + phone=contact.get("phone"), + organization=contact.get("organization"), + job_title=contact.get("job_title"), + ) + if body: + contact_bodies.append({"contactPerson": body}) + + if not contact_bodies: + raise UserInputError("No valid contact data provided.") + + batch_body = { + "contacts": contact_bodies, + "readMask": DEFAULT_PERSON_FIELDS, + } + + result = await asyncio.to_thread( + service.people().batchCreateContacts(body=batch_body).execute + ) + + created_people = result.get("createdPeople", []) + + response = f"Batch Create Results for {user_google_email}:\n\n" + response += f"Created {len(created_people)} contacts:\n\n" + + for item in created_people: + person = item.get("person", {}) + response += _format_contact(person) + "\n\n" + + logger.info( + f"Batch created {len(created_people)} contacts for {user_google_email}" + ) + return response + + if action == "update": + if not updates: + raise UserInputError("updates parameter is required for 'update' action.") + + if len(updates) > 200: + raise UserInputError("Maximum 200 contacts can be updated in a batch.") + + # Fetch all contacts to get their etags + resource_names = [] + for update in updates: + cid = update.get("contact_id") + if not cid: + raise UserInputError("Each update must include a contact_id.") + if not cid.startswith("people/"): + cid = f"people/{cid}" + resource_names.append(cid) + + batch_get_result = await asyncio.to_thread( + service.people() + .getBatchGet( + resourceNames=resource_names, + personFields="metadata", + ) + .execute + ) + + etags = {} + for resp in batch_get_result.get("responses", []): + person = resp.get("person", {}) + rname = person.get("resourceName") + etag = person.get("etag") + if rname and etag: + etags[rname] = etag + + update_bodies = [] + update_fields_set: set = set() + + for update in updates: + cid = update.get("contact_id", "") + if not cid.startswith("people/"): + cid = f"people/{cid}" + + etag = etags.get(cid) + if not etag: + logger.warning(f"No etag found for {cid}, skipping") + continue + + body = _build_person_body( + given_name=update.get("given_name"), + family_name=update.get("family_name"), + email=update.get("email"), + phone=update.get("phone"), + organization=update.get("organization"), + job_title=update.get("job_title"), + ) + + if body: + body["resourceName"] = cid + body["etag"] = etag + update_bodies.append({"person": body}) + + if "names" in body: + update_fields_set.add("names") + if "emailAddresses" in body: + update_fields_set.add("emailAddresses") + if "phoneNumbers" in body: + update_fields_set.add("phoneNumbers") + if "organizations" in body: + update_fields_set.add("organizations") + + if not update_bodies: + raise UserInputError("No valid update data provided.") + + batch_body = { + "contacts": update_bodies, + "updateMask": ",".join(update_fields_set), + "readMask": DEFAULT_PERSON_FIELDS, + } + + result = await asyncio.to_thread( + service.people().batchUpdateContacts(body=batch_body).execute + ) + + update_results = result.get("updateResult", {}) + + response = f"Batch Update Results for {user_google_email}:\n\n" + response += f"Updated {len(update_results)} contacts:\n\n" + + for rname, update_result in update_results.items(): + person = update_result.get("person", {}) + response += _format_contact(person) + "\n\n" + + logger.info( + f"Batch updated {len(update_results)} contacts for {user_google_email}" + ) + return response + + # action == "delete" + if not contact_ids: + raise UserInputError("contact_ids parameter is required for 'delete' action.") + + if len(contact_ids) > 500: + raise UserInputError("Maximum 500 contacts can be deleted in a batch.") + + resource_names = [] + for cid in contact_ids: + if not cid.startswith("people/"): + resource_names.append(f"people/{cid}") + else: + resource_names.append(cid) + + batch_body = {"resourceNames": resource_names} + + await asyncio.to_thread( + service.people().batchDeleteContacts(body=batch_body).execute + ) + + response = f"Batch deleted {len(contact_ids)} contacts for {user_google_email}." + logger.info(f"Batch deleted {len(contact_ids)} contacts for {user_google_email}") + return response + + +@server.tool() +@require_google_service("people", "contacts") +@handle_http_errors("manage_contact_group", service_type="people") +async def manage_contact_group( + service: Resource, + user_google_email: str, + action: str, + group_id: Optional[str] = None, + name: Optional[str] = None, + delete_contacts: bool = False, + add_contact_ids: Optional[List[str]] = None, + remove_contact_ids: Optional[List[str]] = None, +) -> str: + """ + Create, update, delete a contact group, or modify its members. Consolidated tool + replacing create_contact_group, update_contact_group, delete_contact_group, and + modify_contact_group_members. + + Args: + user_google_email (str): The user's Google email address. Required. + action (str): The action to perform: "create", "update", "delete", or "modify_members". + group_id (Optional[str]): The contact group ID. Required for "update", "delete", + and "modify_members" actions. + name (Optional[str]): The group name. Required for "create" and "update" actions. + delete_contacts (bool): If True and action is "delete", also delete contacts in + the group (default: False). + add_contact_ids (Optional[List[str]]): Contact IDs to add (for "modify_members"). + remove_contact_ids (Optional[List[str]]): Contact IDs to remove (for "modify_members"). + + Returns: + str: Result of the action performed. + """ + action = action.lower().strip() + if action not in ("create", "update", "delete", "modify_members"): + raise UserInputError( + f"Invalid action '{action}'. Must be 'create', 'update', 'delete', or 'modify_members'." + ) + + logger.info( + f"[manage_contact_group] Invoked. Action: '{action}', Email: '{user_google_email}'" + ) + + if action == "create": + if not name: + raise UserInputError("name is required for 'create' action.") + + body = {"contactGroup": {"name": name}} + + result = await asyncio.to_thread( + service.contactGroups().create(body=body).execute + ) + + resource_name = result.get("resourceName", "") + created_group_id = resource_name.replace("contactGroups/", "") + created_name = result.get("name", name) + + response = f"Contact Group Created for {user_google_email}:\n\n" + response += f"Name: {created_name}\n" + response += f"ID: {created_group_id}\n" + response += f"Type: {result.get('groupType', 'USER_CONTACT_GROUP')}\n" + + logger.info(f"Created contact group '{name}' for {user_google_email}") + return response + + # All other actions require group_id + if not group_id: + raise UserInputError(f"group_id is required for '{action}' action.") + + # Normalize resource name + if not group_id.startswith("contactGroups/"): + resource_name = f"contactGroups/{group_id}" + else: + resource_name = group_id + + if action == "update": + if not name: + raise UserInputError("name is required for 'update' action.") + + body = {"contactGroup": {"name": name}} + + result = await asyncio.to_thread( + service.contactGroups() + .update(resourceName=resource_name, body=body) + .execute + ) + + updated_name = result.get("name", name) + + response = f"Contact Group Updated for {user_google_email}:\n\n" + response += f"Name: {updated_name}\n" + response += f"ID: {group_id}\n" + + logger.info(f"Updated contact group {resource_name} for {user_google_email}") + return response + + if action == "delete": + await asyncio.to_thread( + service.contactGroups() + .delete(resourceName=resource_name, deleteContacts=delete_contacts) + .execute + ) + + response = f"Contact group {group_id} has been deleted for {user_google_email}." + if delete_contacts: + response += " Contacts in the group were also deleted." + else: + response += " Contacts in the group were preserved." + + logger.info(f"Deleted contact group {resource_name} for {user_google_email}") + return response + + # action == "modify_members" + if not add_contact_ids and not remove_contact_ids: + raise UserInputError( + "At least one of add_contact_ids or remove_contact_ids must be provided." + ) + + modify_body: Dict[str, Any] = {} + + if add_contact_ids: + add_names = [] + for contact_id in add_contact_ids: + if not contact_id.startswith("people/"): + add_names.append(f"people/{contact_id}") + else: + add_names.append(contact_id) + modify_body["resourceNamesToAdd"] = add_names + + if remove_contact_ids: + remove_names = [] + for contact_id in remove_contact_ids: + if not contact_id.startswith("people/"): + remove_names.append(f"people/{contact_id}") + else: + remove_names.append(contact_id) + modify_body["resourceNamesToRemove"] = remove_names + + result = await asyncio.to_thread( + service.contactGroups() + .members() + .modify(resourceName=resource_name, body=modify_body) + .execute + ) + + not_found = result.get("notFoundResourceNames", []) + cannot_remove = result.get("canNotRemoveLastContactGroupResourceNames", []) + + response = f"Contact Group Members Modified for {user_google_email}:\n\n" + response += f"Group: {group_id}\n" + + if add_contact_ids: + response += f"Added: {len(add_contact_ids)} contacts\n" + if remove_contact_ids: + response += f"Removed: {len(remove_contact_ids)} contacts\n" + + if not_found: + response += f"\nNot found: {', '.join(not_found)}\n" + if cannot_remove: + response += f"\nCannot remove (last group): {', '.join(cannot_remove)}\n" + + logger.info( + f"Modified contact group members for {resource_name} for {user_google_email}" + ) + return response diff --git a/gdocs/__init__.py b/gdocs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gdocs/docs_helpers.py b/gdocs/docs_helpers.py new file mode 100644 index 0000000..0a26752 --- /dev/null +++ b/gdocs/docs_helpers.py @@ -0,0 +1,720 @@ +""" +Google Docs Helper Functions + +This module provides utility functions for common Google Docs operations +to simplify the implementation of document editing tools. +""" + +import logging +from typing import Dict, Any, Optional + +logger = logging.getLogger(__name__) + + +def _normalize_color( + color: Optional[str], param_name: str +) -> Optional[Dict[str, float]]: + """ + Normalize a user-supplied color into Docs API rgbColor format. + + Supports only hex strings in the form "#RRGGBB". + """ + if color is None: + return None + + if not isinstance(color, str): + raise ValueError(f"{param_name} must be a hex string like '#RRGGBB'") + + if len(color) != 7 or not color.startswith("#"): + raise ValueError(f"{param_name} must be a hex string like '#RRGGBB'") + + hex_color = color[1:] + if any(c not in "0123456789abcdefABCDEF" for c in hex_color): + raise ValueError(f"{param_name} must be a hex string like '#RRGGBB'") + + r = int(hex_color[0:2], 16) / 255 + g = int(hex_color[2:4], 16) / 255 + b = int(hex_color[4:6], 16) / 255 + return {"red": r, "green": g, "blue": b} + + +def build_text_style( + bold: bool = None, + italic: bool = None, + underline: bool = None, + font_size: int = None, + font_family: str = None, + text_color: str = None, + background_color: str = None, + link_url: str = None, +) -> tuple[Dict[str, Any], list[str]]: + """ + Build text style object for Google Docs API requests. + + Args: + bold: Whether text should be bold + italic: Whether text should be italic + underline: Whether text should be underlined + font_size: Font size in points + font_family: Font family name + text_color: Text color as hex string "#RRGGBB" + background_color: Background (highlight) color as hex string "#RRGGBB" + link_url: Hyperlink URL (http/https) + + Returns: + Tuple of (text_style_dict, list_of_field_names) + """ + text_style = {} + fields = [] + + if bold is not None: + text_style["bold"] = bold + fields.append("bold") + + if italic is not None: + text_style["italic"] = italic + fields.append("italic") + + if underline is not None: + text_style["underline"] = underline + fields.append("underline") + + if font_size is not None: + text_style["fontSize"] = {"magnitude": font_size, "unit": "PT"} + fields.append("fontSize") + + if font_family is not None: + text_style["weightedFontFamily"] = {"fontFamily": font_family} + fields.append("weightedFontFamily") + + if text_color is not None: + rgb = _normalize_color(text_color, "text_color") + text_style["foregroundColor"] = {"color": {"rgbColor": rgb}} + fields.append("foregroundColor") + + if background_color is not None: + rgb = _normalize_color(background_color, "background_color") + text_style["backgroundColor"] = {"color": {"rgbColor": rgb}} + fields.append("backgroundColor") + + if link_url is not None: + text_style["link"] = {"url": link_url} + fields.append("link") + + return text_style, fields + + +def build_paragraph_style( + heading_level: int = None, + alignment: str = None, + line_spacing: float = None, + indent_first_line: float = None, + indent_start: float = None, + indent_end: float = None, + space_above: float = None, + space_below: float = None, + named_style_type: str = None, +) -> tuple[Dict[str, Any], list[str]]: + """ + Build paragraph style object for Google Docs API requests. + + Args: + heading_level: Heading level 0-6 (0 = NORMAL_TEXT, 1-6 = HEADING_N) + alignment: Text alignment - 'START', 'CENTER', 'END', or 'JUSTIFIED' + line_spacing: Line spacing multiplier (1.0 = single, 2.0 = double) + indent_first_line: First line indent in points + indent_start: Left/start indent in points + indent_end: Right/end indent in points + space_above: Space above paragraph in points + space_below: Space below paragraph in points + named_style_type: Direct named style (TITLE, SUBTITLE, HEADING_1..6, NORMAL_TEXT). + Takes precedence over heading_level when both are provided. + + Returns: + Tuple of (paragraph_style_dict, list_of_field_names) + """ + paragraph_style = {} + fields = [] + + if named_style_type is not None: + valid_styles = [ + "NORMAL_TEXT", + "TITLE", + "SUBTITLE", + "HEADING_1", + "HEADING_2", + "HEADING_3", + "HEADING_4", + "HEADING_5", + "HEADING_6", + ] + if named_style_type not in valid_styles: + raise ValueError( + f"Invalid named_style_type '{named_style_type}'. " + f"Must be one of: {', '.join(valid_styles)}" + ) + paragraph_style["namedStyleType"] = named_style_type + fields.append("namedStyleType") + elif heading_level is not None: + if heading_level < 0 or heading_level > 6: + raise ValueError("heading_level must be between 0 (normal text) and 6") + if heading_level == 0: + paragraph_style["namedStyleType"] = "NORMAL_TEXT" + else: + paragraph_style["namedStyleType"] = f"HEADING_{heading_level}" + fields.append("namedStyleType") + + if alignment is not None: + valid_alignments = ["START", "CENTER", "END", "JUSTIFIED"] + alignment_upper = alignment.upper() + if alignment_upper not in valid_alignments: + raise ValueError( + f"Invalid alignment '{alignment}'. Must be one of: {valid_alignments}" + ) + paragraph_style["alignment"] = alignment_upper + fields.append("alignment") + + if line_spacing is not None: + if line_spacing <= 0: + raise ValueError("line_spacing must be positive") + paragraph_style["lineSpacing"] = line_spacing * 100 + fields.append("lineSpacing") + + if indent_first_line is not None: + paragraph_style["indentFirstLine"] = { + "magnitude": indent_first_line, + "unit": "PT", + } + fields.append("indentFirstLine") + + if indent_start is not None: + paragraph_style["indentStart"] = {"magnitude": indent_start, "unit": "PT"} + fields.append("indentStart") + + if indent_end is not None: + paragraph_style["indentEnd"] = {"magnitude": indent_end, "unit": "PT"} + fields.append("indentEnd") + + if space_above is not None: + paragraph_style["spaceAbove"] = {"magnitude": space_above, "unit": "PT"} + fields.append("spaceAbove") + + if space_below is not None: + paragraph_style["spaceBelow"] = {"magnitude": space_below, "unit": "PT"} + fields.append("spaceBelow") + + return paragraph_style, fields + + +def create_insert_text_request( + index: int, text: str, tab_id: Optional[str] = None +) -> Dict[str, Any]: + """ + Create an insertText request for Google Docs API. + + Args: + index: Position to insert text + text: Text to insert + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the insertText request + """ + location = {"index": index} + if tab_id: + location["tabId"] = tab_id + return {"insertText": {"location": location, "text": text}} + + +def create_insert_text_segment_request( + index: int, text: str, segment_id: str, tab_id: Optional[str] = None +) -> Dict[str, Any]: + """ + Create an insertText request for Google Docs API with segmentId (for headers/footers). + + Args: + index: Position to insert text + text: Text to insert + segment_id: Segment ID (for targeting headers/footers) + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the insertText request with segmentId and optional tabId + """ + location = {"segmentId": segment_id, "index": index} + if tab_id: + location["tabId"] = tab_id + return { + "insertText": { + "location": location, + "text": text, + } + } + + +def create_delete_range_request( + start_index: int, end_index: int, tab_id: Optional[str] = None +) -> Dict[str, Any]: + """ + Create a deleteContentRange request for Google Docs API. + + Args: + start_index: Start position of content to delete + end_index: End position of content to delete + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the deleteContentRange request + """ + range_obj = {"startIndex": start_index, "endIndex": end_index} + if tab_id: + range_obj["tabId"] = tab_id + return {"deleteContentRange": {"range": range_obj}} + + +def create_format_text_request( + start_index: int, + end_index: int, + bold: bool = None, + italic: bool = None, + underline: bool = None, + font_size: int = None, + font_family: str = None, + text_color: str = None, + background_color: str = None, + link_url: str = None, + tab_id: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + """ + Create an updateTextStyle request for Google Docs API. + + Args: + start_index: Start position of text to format + end_index: End position of text to format + bold: Whether text should be bold + italic: Whether text should be italic + underline: Whether text should be underlined + font_size: Font size in points + font_family: Font family name + text_color: Text color as hex string "#RRGGBB" + background_color: Background (highlight) color as hex string "#RRGGBB" + link_url: Hyperlink URL (http/https) + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the updateTextStyle request, or None if no styles provided + """ + text_style, fields = build_text_style( + bold, + italic, + underline, + font_size, + font_family, + text_color, + background_color, + link_url, + ) + + if not text_style: + return None + + range_obj = {"startIndex": start_index, "endIndex": end_index} + if tab_id: + range_obj["tabId"] = tab_id + + return { + "updateTextStyle": { + "range": range_obj, + "textStyle": text_style, + "fields": ",".join(fields), + } + } + + +def create_update_paragraph_style_request( + start_index: int, + end_index: int, + heading_level: int = None, + alignment: str = None, + line_spacing: float = None, + indent_first_line: float = None, + indent_start: float = None, + indent_end: float = None, + space_above: float = None, + space_below: float = None, + tab_id: Optional[str] = None, + named_style_type: str = None, +) -> Optional[Dict[str, Any]]: + """ + Create an updateParagraphStyle request for Google Docs API. + + Args: + start_index: Start position of paragraph range + end_index: End position of paragraph range + heading_level: Heading level 0-6 (0 = NORMAL_TEXT, 1-6 = HEADING_N) + alignment: Text alignment - 'START', 'CENTER', 'END', or 'JUSTIFIED' + line_spacing: Line spacing multiplier (1.0 = single, 2.0 = double) + indent_first_line: First line indent in points + indent_start: Left/start indent in points + indent_end: Right/end indent in points + space_above: Space above paragraph in points + space_below: Space below paragraph in points + tab_id: Optional ID of the tab to target + named_style_type: Direct named style (TITLE, SUBTITLE, HEADING_1..6, NORMAL_TEXT) + + Returns: + Dictionary representing the updateParagraphStyle request, or None if no styles provided + """ + paragraph_style, fields = build_paragraph_style( + heading_level, + alignment, + line_spacing, + indent_first_line, + indent_start, + indent_end, + space_above, + space_below, + named_style_type, + ) + + if not paragraph_style: + return None + + range_obj = {"startIndex": start_index, "endIndex": end_index} + if tab_id: + range_obj["tabId"] = tab_id + + return { + "updateParagraphStyle": { + "range": range_obj, + "paragraphStyle": paragraph_style, + "fields": ",".join(fields), + } + } + + +def create_find_replace_request( + find_text: str, + replace_text: str, + match_case: bool = False, + tab_id: Optional[str] = None, +) -> Dict[str, Any]: + """ + Create a replaceAllText request for Google Docs API. + + Args: + find_text: Text to find + replace_text: Text to replace with + match_case: Whether to match case exactly + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the replaceAllText request + """ + request = { + "replaceAllText": { + "containsText": {"text": find_text, "matchCase": match_case}, + "replaceText": replace_text, + } + } + if tab_id: + request["replaceAllText"]["tabsCriteria"] = {"tabIds": [tab_id]} + return request + + +def create_insert_table_request( + index: int, rows: int, columns: int, tab_id: Optional[str] = None +) -> Dict[str, Any]: + """ + Create an insertTable request for Google Docs API. + + Args: + index: Position to insert table + rows: Number of rows + columns: Number of columns + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the insertTable request + """ + location = {"index": index} + if tab_id: + location["tabId"] = tab_id + return {"insertTable": {"location": location, "rows": rows, "columns": columns}} + + +def create_insert_page_break_request( + index: int, tab_id: Optional[str] = None +) -> Dict[str, Any]: + """ + Create an insertPageBreak request for Google Docs API. + + Args: + index: Position to insert page break + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the insertPageBreak request + """ + location = {"index": index} + if tab_id: + location["tabId"] = tab_id + return {"insertPageBreak": {"location": location}} + + +def create_insert_doc_tab_request( + title: str, index: int, parent_tab_id: Optional[str] = None +) -> Dict[str, Any]: + """ + Create an addDocumentTab request for Google Docs API. + + Args: + title: Title of the new tab + index: Position to insert the tab + parent_tab_id: Optional ID of the parent tab to nest under + + Returns: + Dictionary representing the addDocumentTab request + """ + tab_properties: Dict[str, Any] = { + "title": title, + "index": index, + } + if parent_tab_id: + tab_properties["parentTabId"] = parent_tab_id + return { + "addDocumentTab": { + "tabProperties": tab_properties, + } + } + + +def create_delete_doc_tab_request(tab_id: str) -> Dict[str, Any]: + """ + Create a deleteDocumentTab request for Google Docs API. + + Args: + tab_id: ID of the tab to delete + + Returns: + Dictionary representing the deleteDocumentTab request + """ + return {"deleteTab": {"tabId": tab_id}} + + +def create_update_doc_tab_request(tab_id: str, title: str) -> Dict[str, Any]: + """ + Create an updateDocumentTab request for Google Docs API. + + Args: + tab_id: ID of the tab to update + title: New title for the tab + + Returns: + Dictionary representing the updateDocumentTab request + """ + return { + "updateDocumentTabProperties": { + "tabProperties": { + "tabId": tab_id, + "title": title, + }, + "fields": "title", + } + } + + +def create_insert_image_request( + index: int, + image_uri: str, + width: int = None, + height: int = None, + tab_id: Optional[str] = None, +) -> Dict[str, Any]: + """ + Create an insertInlineImage request for Google Docs API. + + Args: + index: Position to insert image + image_uri: URI of the image (Drive URL or public URL) + width: Image width in points + height: Image height in points + tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the insertInlineImage request + """ + location = {"index": index} + if tab_id: + location["tabId"] = tab_id + + request = {"insertInlineImage": {"location": location, "uri": image_uri}} + + # Add size properties if specified + object_size = {} + if width is not None: + object_size["width"] = {"magnitude": width, "unit": "PT"} + if height is not None: + object_size["height"] = {"magnitude": height, "unit": "PT"} + + if object_size: + request["insertInlineImage"]["objectSize"] = object_size + + return request + + +def create_bullet_list_request( + start_index: int, + end_index: int, + list_type: str = "UNORDERED", + nesting_level: int = None, + paragraph_start_indices: Optional[list[int]] = None, + doc_tab_id: Optional[str] = None, +) -> list[Dict[str, Any]]: + """ + Create requests to apply bullet list formatting with optional nesting. + + Google Docs infers list nesting from leading tab characters. To set a nested + level, this helper inserts literal tab characters before each targeted + paragraph, then calls createParagraphBullets. This is a Docs API workaround + and does temporarily mutate content/index positions while the batch executes. + + Args: + start_index: Start of text range to convert to list + end_index: End of text range to convert to list + list_type: Type of list ("UNORDERED" or "ORDERED") + nesting_level: Nesting level (0-8, where 0 is top level). If None or 0, no tabs added. + paragraph_start_indices: Optional paragraph start positions for ranges with + multiple paragraphs. If omitted, only start_index is tab-prefixed. + doc_tab_id: Optional ID of the tab to target + + Returns: + List of request dictionaries (insertText for nesting tabs if needed, + then createParagraphBullets) + """ + bullet_preset = ( + "BULLET_DISC_CIRCLE_SQUARE" + if list_type == "UNORDERED" + else "NUMBERED_DECIMAL_ALPHA_ROMAN" + ) + + # Validate nesting level + if nesting_level is not None: + if not isinstance(nesting_level, int): + raise ValueError("nesting_level must be an integer between 0 and 8") + if nesting_level < 0 or nesting_level > 8: + raise ValueError("nesting_level must be between 0 and 8") + + requests = [] + + # Insert tabs for nesting if needed (nesting_level > 0). + # For multi-paragraph ranges, callers should provide paragraph_start_indices. + if nesting_level and nesting_level > 0: + tabs = "\t" * nesting_level + paragraph_starts = paragraph_start_indices or [start_index] + paragraph_starts = sorted(set(paragraph_starts)) + + if any(not isinstance(idx, int) for idx in paragraph_starts): + raise ValueError("paragraph_start_indices must contain only integers") + + original_start = start_index + original_end = end_index + inserted_char_count = 0 + + for paragraph_start in paragraph_starts: + adjusted_start = paragraph_start + inserted_char_count + requests.append( + create_insert_text_request(adjusted_start, tabs, doc_tab_id) + ) + inserted_char_count += nesting_level + + # Keep createParagraphBullets range aligned to the same logical content. + start_index += ( + sum(1 for idx in paragraph_starts if idx < original_start) * nesting_level + ) + end_index += ( + sum(1 for idx in paragraph_starts if idx < original_end) * nesting_level + ) + + # Create the bullet list + range_obj = {"startIndex": start_index, "endIndex": end_index} + if doc_tab_id: + range_obj["tabId"] = doc_tab_id + + requests.append( + { + "createParagraphBullets": { + "range": range_obj, + "bulletPreset": bullet_preset, + } + } + ) + + return requests + + +def create_delete_bullet_list_request( + start_index: int, + end_index: int, + doc_tab_id: Optional[str] = None, +) -> Dict[str, Any]: + """ + Create a deleteParagraphBullets request to remove bullet/list formatting. + + Args: + start_index: Start of the paragraph range + end_index: End of the paragraph range + doc_tab_id: Optional ID of the tab to target + + Returns: + Dictionary representing the deleteParagraphBullets request + """ + range_obj = {"startIndex": start_index, "endIndex": end_index} + if doc_tab_id: + range_obj["tabId"] = doc_tab_id + + return { + "deleteParagraphBullets": { + "range": range_obj, + } + } + + +def validate_operation(operation: Dict[str, Any]) -> tuple[bool, str]: + """ + Validate a batch operation dictionary. + + Args: + operation: Operation dictionary to validate + + Returns: + Tuple of (is_valid, error_message) + """ + op_type = operation.get("type") + if not op_type: + return False, "Missing 'type' field" + + # Validate required fields for each operation type + required_fields = { + "insert_text": ["index", "text"], + "delete_text": ["start_index", "end_index"], + "replace_text": ["start_index", "end_index", "text"], + "format_text": ["start_index", "end_index"], + "update_paragraph_style": ["start_index", "end_index"], + "insert_table": ["index", "rows", "columns"], + "insert_page_break": ["index"], + "find_replace": ["find_text", "replace_text"], + "create_bullet_list": ["start_index", "end_index"], + "insert_doc_tab": ["title", "index"], + "delete_doc_tab": ["tab_id"], + "update_doc_tab": ["tab_id", "title"], + } + + if op_type not in required_fields: + return False, f"Unsupported operation type: {op_type or 'None'}" + + for field in required_fields[op_type]: + if field not in operation: + return False, f"Missing required field: {field}" + + return True, "" diff --git a/gdocs/docs_markdown.py b/gdocs/docs_markdown.py new file mode 100644 index 0000000..d9c183d --- /dev/null +++ b/gdocs/docs_markdown.py @@ -0,0 +1,344 @@ +""" +Google Docs to Markdown Converter + +Converts Google Docs API JSON responses to clean Markdown, preserving: +- Headings (H1-H6, Title, Subtitle) +- Bold, italic, strikethrough, code, links +- Ordered and unordered lists with nesting +- Checklists with checked/unchecked state +- Tables with header row separators +""" + +from __future__ import annotations + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + +MONO_FONTS = {"Courier New", "Consolas", "Roboto Mono", "Source Code Pro"} + +HEADING_MAP = { + "TITLE": "#", + "SUBTITLE": "##", + "HEADING_1": "#", + "HEADING_2": "##", + "HEADING_3": "###", + "HEADING_4": "####", + "HEADING_5": "#####", + "HEADING_6": "######", +} + + +def convert_doc_to_markdown(doc: dict[str, Any]) -> str: + """Convert a Google Docs API document response to markdown. + + Args: + doc: The document JSON from docs.documents.get() + + Returns: + Markdown string + """ + body = doc.get("body", {}) + content = body.get("content", []) + lists_meta = doc.get("lists", {}) + + lines: list[str] = [] + ordered_counters: dict[tuple[str, int], int] = {} + prev_was_list = False + + for element in content: + if "paragraph" in element: + para = element["paragraph"] + text = _convert_paragraph_text(para) + + if not text.strip(): + if prev_was_list: + prev_was_list = False + continue + + bullet = para.get("bullet") + if bullet: + list_id = bullet["listId"] + nesting = bullet.get("nestingLevel", 0) + + if _is_checklist(lists_meta, list_id, nesting): + checked = _is_checked(para) + checkbox = "[x]" if checked else "[ ]" + indent = " " * nesting + # Re-render text without strikethrough for checked items + # to avoid redundant ~~text~~ alongside [x] + cb_text = ( + _convert_paragraph_text(para, skip_strikethrough=True) + if checked + else text + ) + lines.append(f"{indent}- {checkbox} {cb_text}") + elif _is_ordered_list(lists_meta, list_id, nesting): + key = (list_id, nesting) + ordered_counters[key] = ordered_counters.get(key, 0) + 1 + counter = ordered_counters[key] + indent = " " * nesting + lines.append(f"{indent}{counter}. {text}") + else: + indent = " " * nesting + lines.append(f"{indent}- {text}") + prev_was_list = True + else: + if prev_was_list: + ordered_counters.clear() + lines.append("") + prev_was_list = False + + style = para.get("paragraphStyle", {}) + named_style = style.get("namedStyleType", "NORMAL_TEXT") + prefix = HEADING_MAP.get(named_style, "") + + if prefix: + lines.append(f"{prefix} {text}") + lines.append("") + else: + lines.append(text) + lines.append("") + + elif "table" in element: + if prev_was_list: + ordered_counters.clear() + lines.append("") + prev_was_list = False + table_md = _convert_table(element["table"]) + lines.append(table_md) + lines.append("") + + result = "\n".join(lines).rstrip("\n") + "\n" + return result + + +def _convert_paragraph_text( + para: dict[str, Any], skip_strikethrough: bool = False +) -> str: + """Convert paragraph elements to inline markdown text.""" + parts: list[str] = [] + for elem in para.get("elements", []): + if "textRun" in elem: + parts.append(_convert_text_run(elem["textRun"], skip_strikethrough)) + return "".join(parts).strip() + + +def _convert_text_run( + text_run: dict[str, Any], skip_strikethrough: bool = False +) -> str: + """Convert a single text run to markdown.""" + content = text_run.get("content", "") + style = text_run.get("textStyle", {}) + + text = content.rstrip("\n") + if not text: + return "" + + return _apply_text_style(text, style, skip_strikethrough) + + +def _apply_text_style( + text: str, style: dict[str, Any], skip_strikethrough: bool = False +) -> str: + """Apply markdown formatting based on text style.""" + link = style.get("link", {}) + url = link.get("url") + + font_family = style.get("weightedFontFamily", {}).get("fontFamily", "") + if font_family in MONO_FONTS: + return f"`{text}`" + + bold = style.get("bold", False) + italic = style.get("italic", False) + strikethrough = style.get("strikethrough", False) + + if bold and italic: + text = f"***{text}***" + elif bold: + text = f"**{text}**" + elif italic: + text = f"*{text}*" + + if strikethrough and not skip_strikethrough: + text = f"~~{text}~~" + + if url: + text = f"[{text}]({url})" + + return text + + +def _is_ordered_list(lists_meta: dict[str, Any], list_id: str, nesting: int) -> bool: + """Check if a list at a given nesting level is ordered.""" + list_info = lists_meta.get(list_id, {}) + nesting_levels = list_info.get("listProperties", {}).get("nestingLevels", []) + if nesting < len(nesting_levels): + level = nesting_levels[nesting] + glyph = level.get("glyphType", "") + return glyph not in ("", "GLYPH_TYPE_UNSPECIFIED") + return False + + +def _is_checklist(lists_meta: dict[str, Any], list_id: str, nesting: int) -> bool: + """Check if a list at a given nesting level is a checklist. + + Google Docs checklists are distinguished from regular bullet lists by having + GLYPH_TYPE_UNSPECIFIED with no glyphSymbol — the Docs UI renders interactive + checkboxes rather than a static glyph character. + """ + list_info = lists_meta.get(list_id, {}) + nesting_levels = list_info.get("listProperties", {}).get("nestingLevels", []) + if nesting < len(nesting_levels): + level = nesting_levels[nesting] + glyph_type = level.get("glyphType", "") + has_glyph_symbol = "glyphSymbol" in level + return glyph_type in ("", "GLYPH_TYPE_UNSPECIFIED") and not has_glyph_symbol + return False + + +def _is_checked(para: dict[str, Any]) -> bool: + """Check if a checklist item is checked. + + Google Docs marks checked checklist items by applying strikethrough + formatting to the paragraph text. + """ + for elem in para.get("elements", []): + if "textRun" in elem: + content = elem["textRun"].get("content", "").strip() + if content: + return elem["textRun"].get("textStyle", {}).get("strikethrough", False) + return False + + +def _convert_table(table: dict[str, Any]) -> str: + """Convert a table element to markdown.""" + rows = table.get("tableRows", []) + if not rows: + return "" + + md_rows: list[str] = [] + for i, row in enumerate(rows): + cells: list[str] = [] + for cell in row.get("tableCells", []): + cell_text = _extract_cell_text(cell) + cells.append(cell_text) + md_rows.append("| " + " | ".join(cells) + " |") + + if i == 0: + sep = "| " + " | ".join("---" for _ in cells) + " |" + md_rows.append(sep) + + return "\n".join(md_rows) + + +def _extract_cell_text(cell: dict[str, Any]) -> str: + """Extract text from a table cell.""" + parts: list[str] = [] + for content_elem in cell.get("content", []): + if "paragraph" in content_elem: + text = _convert_paragraph_text(content_elem["paragraph"]) + if text.strip(): + parts.append(text.strip()) + cell_text = " ".join(parts) + return cell_text.replace("|", "\\|") + + +def format_comments_inline(markdown: str, comments: list[dict[str, Any]]) -> str: + """Insert footnote-style comment annotations inline in markdown. + + For each comment, finds the anchor text in the markdown and inserts + a footnote reference. Unmatched comments go to an appendix at the bottom. + """ + if not comments: + return markdown + + footnotes: list[str] = [] + unmatched: list[dict[str, Any]] = [] + + for i, comment in enumerate(comments, 1): + ref = f"[^c{i}]" + anchor = comment.get("anchor_text", "") + + if anchor and anchor in markdown: + markdown = markdown.replace(anchor, anchor + ref, 1) + footnotes.append(_format_footnote(i, comment)) + else: + unmatched.append(comment) + + if footnotes: + markdown = markdown.rstrip("\n") + "\n\n" + "\n".join(footnotes) + "\n" + + if unmatched: + appendix = format_comments_appendix(unmatched) + if appendix.strip(): + markdown = markdown.rstrip("\n") + "\n\n" + appendix + + return markdown + + +def _format_footnote(num: int, comment: dict[str, Any]) -> str: + """Format a single footnote.""" + lines = [f"[^c{num}]: **{comment['author']}**: {comment['content']}"] + for reply in comment.get("replies", []): + lines.append(f" - **{reply['author']}**: {reply['content']}") + return "\n".join(lines) + + +def format_comments_appendix(comments: list[dict[str, Any]]) -> str: + """Format comments as an appendix section with blockquoted anchors.""" + if not comments: + return "" + + lines = ["## Comments", ""] + for comment in comments: + resolved_tag = " *(Resolved)*" if comment.get("resolved") else "" + anchor = comment.get("anchor_text", "") + if anchor: + lines.append(f"> {anchor}") + lines.append("") + lines.append(f"- **{comment['author']}**: {comment['content']}{resolved_tag}") + for reply in comment.get("replies", []): + lines.append(f" - **{reply['author']}**: {reply['content']}") + lines.append("") + + return "\n".join(lines) + + +def parse_drive_comments( + response: dict[str, Any], include_resolved: bool = False +) -> list[dict[str, Any]]: + """Parse Drive API comments response into structured dicts. + + Args: + response: Raw JSON from drive.comments.list() + include_resolved: Whether to include resolved comments + + Returns: + List of comment dicts with keys: author, content, anchor_text, + replies, resolved + """ + results = [] + for comment in response.get("comments", []): + if not include_resolved and comment.get("resolved", False): + continue + + anchor_text = comment.get("quotedFileContent", {}).get("value", "") + replies = [ + { + "author": r.get("author", {}).get("displayName", "Unknown"), + "content": r.get("content", ""), + } + for r in comment.get("replies", []) + ] + results.append( + { + "author": comment.get("author", {}).get("displayName", "Unknown"), + "content": comment.get("content", ""), + "anchor_text": anchor_text, + "replies": replies, + "resolved": comment.get("resolved", False), + } + ) + return results diff --git a/gdocs/docs_structure.py b/gdocs/docs_structure.py new file mode 100644 index 0000000..d57da20 --- /dev/null +++ b/gdocs/docs_structure.py @@ -0,0 +1,357 @@ +""" +Google Docs Document Structure Parsing and Analysis + +This module provides utilities for parsing and analyzing the structure +of Google Docs documents, including finding tables, cells, and other elements. +""" + +import logging +from typing import Any, Optional + +logger = logging.getLogger(__name__) + + +def parse_document_structure(doc_data: dict[str, Any]) -> dict[str, Any]: + """ + Parse the full document structure into a navigable format. + + Args: + doc_data: Raw document data from Google Docs API + + Returns: + Dictionary containing parsed structure with elements and their positions + """ + structure = { + "title": doc_data.get("title", ""), + "body": [], + "tables": [], + "headers": {}, + "footers": {}, + "total_length": 0, + } + + body = doc_data.get("body", {}) + content = body.get("content", []) + + for element in content: + element_info = _parse_element(element) + if element_info: + structure["body"].append(element_info) + if element_info["type"] == "table": + structure["tables"].append(element_info) + + # Calculate total document length + if structure["body"]: + last_element = structure["body"][-1] + structure["total_length"] = last_element.get("end_index", 0) + + # Parse headers and footers + for header_id, header_data in doc_data.get("headers", {}).items(): + structure["headers"][header_id] = _parse_segment(header_data) + + for footer_id, footer_data in doc_data.get("footers", {}).items(): + structure["footers"][footer_id] = _parse_segment(footer_data) + + return structure + + +def _parse_element(element: dict[str, Any]) -> Optional[dict[str, Any]]: + """ + Parse a single document element. + + Args: + element: Element data from document + + Returns: + Parsed element information or None + """ + element_info = { + "start_index": element.get("startIndex", 0), + "end_index": element.get("endIndex", 0), + } + + if "paragraph" in element: + paragraph = element["paragraph"] + element_info["type"] = "paragraph" + element_info["text"] = _extract_paragraph_text(paragraph) + element_info["style"] = paragraph.get("paragraphStyle", {}) + + elif "table" in element: + table = element["table"] + element_info["type"] = "table" + element_info["rows"] = len(table.get("tableRows", [])) + element_info["columns"] = len( + table.get("tableRows", [{}])[0].get("tableCells", []) + ) + element_info["cells"] = _parse_table_cells(table) + element_info["table_style"] = table.get("tableStyle", {}) + + elif "sectionBreak" in element: + element_info["type"] = "section_break" + element_info["section_style"] = element["sectionBreak"].get("sectionStyle", {}) + + elif "tableOfContents" in element: + element_info["type"] = "table_of_contents" + + else: + return None + + return element_info + + +def _parse_table_cells(table: dict[str, Any]) -> list[list[dict[str, Any]]]: + """ + Parse table cells with their positions and content. + + Args: + table: Table element data + + Returns: + 2D list of cell information + """ + cells = [] + for row_idx, row in enumerate(table.get("tableRows", [])): + row_cells = [] + for col_idx, cell in enumerate(row.get("tableCells", [])): + # Find the first paragraph in the cell for insertion + insertion_index = cell.get("startIndex", 0) + 1 # Default fallback + + # Look for the first paragraph in cell content + content_elements = cell.get("content", []) + for element in content_elements: + if "paragraph" in element: + paragraph = element["paragraph"] + # Get the first element in the paragraph + para_elements = paragraph.get("elements", []) + if para_elements: + first_element = para_elements[0] + if "startIndex" in first_element: + insertion_index = first_element["startIndex"] + break + + cell_info = { + "row": row_idx, + "column": col_idx, + "start_index": cell.get("startIndex", 0), + "end_index": cell.get("endIndex", 0), + "insertion_index": insertion_index, # Where to insert text in this cell + "content": _extract_cell_text(cell), + "content_elements": content_elements, + } + row_cells.append(cell_info) + cells.append(row_cells) + return cells + + +def _extract_paragraph_text(paragraph: dict[str, Any]) -> str: + """Extract text from a paragraph element.""" + text_parts = [] + for element in paragraph.get("elements", []): + if "textRun" in element: + text_parts.append(element["textRun"].get("content", "")) + return "".join(text_parts) + + +def _extract_cell_text(cell: dict[str, Any]) -> str: + """Extract text content from a table cell.""" + text_parts = [] + for element in cell.get("content", []): + if "paragraph" in element: + text_parts.append(_extract_paragraph_text(element["paragraph"])) + return "".join(text_parts) + + +def _parse_segment(segment_data: dict[str, Any]) -> dict[str, Any]: + """Parse a document segment (header/footer).""" + return { + "content": segment_data.get("content", []), + "start_index": segment_data.get("content", [{}])[0].get("startIndex", 0) + if segment_data.get("content") + else 0, + "end_index": segment_data.get("content", [{}])[-1].get("endIndex", 0) + if segment_data.get("content") + else 0, + } + + +def find_tables(doc_data: dict[str, Any]) -> list[dict[str, Any]]: + """ + Find all tables in the document with their positions and dimensions. + + Args: + doc_data: Raw document data from Google Docs API + + Returns: + List of table information dictionaries + """ + tables = [] + structure = parse_document_structure(doc_data) + + for idx, table_info in enumerate(structure["tables"]): + tables.append( + { + "index": idx, + "start_index": table_info["start_index"], + "end_index": table_info["end_index"], + "rows": table_info["rows"], + "columns": table_info["columns"], + "cells": table_info["cells"], + } + ) + + return tables + + +def get_table_cell_indices( + doc_data: dict[str, Any], table_index: int = 0 +) -> Optional[list[list[tuple[int, int]]]]: + """ + Get content indices for all cells in a specific table. + + Args: + doc_data: Raw document data from Google Docs API + table_index: Index of the table (0-based) + + Returns: + 2D list of (start_index, end_index) tuples for each cell, or None if table not found + """ + tables = find_tables(doc_data) + + if table_index >= len(tables): + logger.warning( + f"Table index {table_index} not found. Document has {len(tables)} tables." + ) + return None + + table = tables[table_index] + cell_indices = [] + + for row in table["cells"]: + row_indices = [] + for cell in row: + # Each cell contains at least one paragraph + # Find the first paragraph in the cell for content insertion + cell_content = cell.get("content_elements", []) + if cell_content: + # Look for the first paragraph in cell content + first_para = None + for element in cell_content: + if "paragraph" in element: + first_para = element["paragraph"] + break + + if first_para and "elements" in first_para and first_para["elements"]: + # Insert at the start of the first text run in the paragraph + first_text_element = first_para["elements"][0] + if "textRun" in first_text_element: + start_idx = first_text_element.get( + "startIndex", cell["start_index"] + 1 + ) + end_idx = first_text_element.get("endIndex", start_idx + 1) + row_indices.append((start_idx, end_idx)) + continue + + # Fallback: use cell boundaries with safe margins + content_start = cell["start_index"] + 1 + content_end = cell["end_index"] - 1 + row_indices.append((content_start, content_end)) + cell_indices.append(row_indices) + + return cell_indices + + +def find_element_at_index( + doc_data: dict[str, Any], index: int +) -> Optional[dict[str, Any]]: + """ + Find what element exists at a given index in the document. + + Args: + doc_data: Raw document data from Google Docs API + index: Position in the document + + Returns: + Information about the element at that position, or None + """ + structure = parse_document_structure(doc_data) + + for element in structure["body"]: + if element["start_index"] <= index < element["end_index"]: + element_copy = element.copy() + + # If it's a table, find which cell contains the index + if element["type"] == "table" and "cells" in element: + for row_idx, row in enumerate(element["cells"]): + for col_idx, cell in enumerate(row): + if cell["start_index"] <= index < cell["end_index"]: + element_copy["containing_cell"] = { + "row": row_idx, + "column": col_idx, + "cell_start": cell["start_index"], + "cell_end": cell["end_index"], + } + break + + return element_copy + + return None + + +def get_next_paragraph_index(doc_data: dict[str, Any], after_index: int = 0) -> int: + """ + Find the next safe position to insert content after a given index. + + Args: + doc_data: Raw document data from Google Docs API + after_index: Index after which to find insertion point + + Returns: + Safe index for insertion + """ + structure = parse_document_structure(doc_data) + + # Find the first paragraph element after the given index + for element in structure["body"]: + if element["type"] == "paragraph" and element["start_index"] > after_index: + # Insert at the end of the previous element or start of this paragraph + return element["start_index"] + + # If no paragraph found, return the end of document + return structure["total_length"] - 1 if structure["total_length"] > 0 else 1 + + +def analyze_document_complexity(doc_data: dict[str, Any]) -> dict[str, Any]: + """ + Analyze document complexity and provide statistics. + + Args: + doc_data: Raw document data from Google Docs API + + Returns: + Dictionary with document statistics + """ + structure = parse_document_structure(doc_data) + + stats = { + "total_elements": len(structure["body"]), + "tables": len(structure["tables"]), + "paragraphs": sum(1 for e in structure["body"] if e.get("type") == "paragraph"), + "section_breaks": sum( + 1 for e in structure["body"] if e.get("type") == "section_break" + ), + "total_length": structure["total_length"], + "has_headers": bool(structure["headers"]), + "has_footers": bool(structure["footers"]), + } + + # Add table statistics + if structure["tables"]: + total_cells = sum( + table["rows"] * table["columns"] for table in structure["tables"] + ) + stats["total_table_cells"] = total_cells + stats["largest_table"] = max( + (t["rows"] * t["columns"] for t in structure["tables"]), default=0 + ) + + return stats diff --git a/gdocs/docs_tables.py b/gdocs/docs_tables.py new file mode 100644 index 0000000..7ff53f8 --- /dev/null +++ b/gdocs/docs_tables.py @@ -0,0 +1,464 @@ +""" +Google Docs Table Operations + +This module provides utilities for creating and manipulating tables +in Google Docs, including population with data and formatting. +""" + +import logging +from typing import Dict, Any, List, Optional, Union, Tuple + +logger = logging.getLogger(__name__) + + +def build_table_population_requests( + table_info: Dict[str, Any], data: List[List[str]], bold_headers: bool = True +) -> List[Dict[str, Any]]: + """ + Build batch requests to populate a table with data. + + Args: + table_info: Table information from document structure including cell indices + data: 2D array of data to insert into table + bold_headers: Whether to make the first row bold + + Returns: + List of request dictionaries for batch update + """ + requests = [] + cells = table_info.get("cells", []) + + if not cells: + logger.warning("No cell information found in table_info") + return requests + + # Process each cell - ONLY INSERT, DON'T DELETE + for row_idx, row_data in enumerate(data): + if row_idx >= len(cells): + logger.warning( + f"Data has more rows ({len(data)}) than table ({len(cells)})" + ) + break + + for col_idx, cell_text in enumerate(row_data): + if col_idx >= len(cells[row_idx]): + logger.warning( + f"Data has more columns ({len(row_data)}) than table row {row_idx} ({len(cells[row_idx])})" + ) + break + + cell = cells[row_idx][col_idx] + + # For new empty tables, use the insertion index + # For tables with existing content, check if cell only contains newline + existing_content = cell.get("content", "").strip() + + # Only insert if we have text to insert + if cell_text: + # Use the specific insertion index for this cell + insertion_index = cell.get("insertion_index", cell["start_index"] + 1) + + # If cell only contains a newline, replace it + if existing_content == "" or existing_content == "\n": + # Cell is empty (just newline), insert at the insertion index + requests.append( + { + "insertText": { + "location": {"index": insertion_index}, + "text": cell_text, + } + } + ) + + # Apply bold formatting to first row if requested + if bold_headers and row_idx == 0: + requests.append( + { + "updateTextStyle": { + "range": { + "startIndex": insertion_index, + "endIndex": insertion_index + len(cell_text), + }, + "textStyle": {"bold": True}, + "fields": "bold", + } + } + ) + else: + # Cell has content, append after existing content + # Find the end of existing content + cell_end = cell["end_index"] - 1 # Don't include cell end marker + requests.append( + { + "insertText": { + "location": {"index": cell_end}, + "text": cell_text, + } + } + ) + + # Apply bold formatting to first row if requested + if bold_headers and row_idx == 0: + requests.append( + { + "updateTextStyle": { + "range": { + "startIndex": cell_end, + "endIndex": cell_end + len(cell_text), + }, + "textStyle": {"bold": True}, + "fields": "bold", + } + } + ) + + return requests + + +def calculate_cell_positions( + table_start_index: int, + rows: int, + cols: int, + existing_table_data: Optional[Dict[str, Any]] = None, +) -> List[List[Dict[str, int]]]: + """ + Calculate estimated positions for each cell in a table. + + Args: + table_start_index: Starting index of the table + rows: Number of rows + cols: Number of columns + existing_table_data: Optional existing table data with actual positions + + Returns: + 2D list of cell position dictionaries + """ + if existing_table_data and "cells" in existing_table_data: + # Use actual positions from existing table + return existing_table_data["cells"] + + # Estimate positions for a new table + # Note: These are estimates; actual positions depend on content + cells = [] + current_index = table_start_index + 2 # Account for table start + + for row_idx in range(rows): + row_cells = [] + for col_idx in range(cols): + # Each cell typically starts with a paragraph marker + cell_start = current_index + cell_end = current_index + 2 # Minimum cell size + + row_cells.append( + { + "row": row_idx, + "column": col_idx, + "start_index": cell_start, + "end_index": cell_end, + } + ) + + current_index = cell_end + 1 + + cells.append(row_cells) + + return cells + + +def format_table_data( + raw_data: Union[List[List[str]], List[str], str], +) -> List[List[str]]: + """ + Normalize various data formats into a 2D array for table insertion. + + Args: + raw_data: Data in various formats (2D list, 1D list, or delimited string) + + Returns: + Normalized 2D list of strings + """ + if isinstance(raw_data, str): + # Parse delimited string (detect delimiter) + lines = raw_data.strip().split("\n") + if "\t" in raw_data: + # Tab-delimited + return [line.split("\t") for line in lines] + elif "," in raw_data: + # Comma-delimited (simple CSV) + return [line.split(",") for line in lines] + else: + # Space-delimited or single column + return [[cell.strip() for cell in line.split()] for line in lines] + + elif isinstance(raw_data, list): + if not raw_data: + return [[]] + + # Check if it's already a 2D list + if isinstance(raw_data[0], list): + # Ensure all cells are strings + return [[str(cell) for cell in row] for row in raw_data] + else: + # Convert 1D list to single-column table + return [[str(cell)] for cell in raw_data] + + else: + # Convert single value to 1x1 table + return [[str(raw_data)]] + + +def create_table_with_data( + index: int, + data: List[List[str]], + headers: Optional[List[str]] = None, + bold_headers: bool = True, +) -> List[Dict[str, Any]]: + """ + Create a table and populate it with data in one operation. + + Args: + index: Position to insert the table + data: 2D array of table data + headers: Optional header row (will be prepended to data) + bold_headers: Whether to make headers bold + + Returns: + List of request dictionaries for batch update + """ + requests = [] + + # Prepare data with headers if provided + if headers: + full_data = [headers] + data + else: + full_data = data + + # Normalize the data + full_data = format_table_data(full_data) + + if not full_data or not full_data[0]: + raise ValueError("Cannot create table with empty data") + + rows = len(full_data) + cols = len(full_data[0]) + + # Ensure all rows have the same number of columns + for row in full_data: + while len(row) < cols: + row.append("") + + # Create the table + requests.append( + {"insertTable": {"location": {"index": index}, "rows": rows, "columns": cols}} + ) + + # Build text insertion requests for each cell + # Note: In practice, we'd need to get the actual document structure + # after table creation to get accurate indices + + return requests + + +def build_table_style_requests( + table_start_index: int, style_options: Dict[str, Any] +) -> List[Dict[str, Any]]: + """ + Build requests to style a table. + + Args: + table_start_index: Starting index of the table + style_options: Dictionary of style options + - border_width: Width of borders in points + - border_color: RGB color for borders + - background_color: RGB color for cell backgrounds + - header_background: RGB color for header row background + + Returns: + List of request dictionaries for styling + """ + requests = [] + + # Table cell style update + if any( + k in style_options for k in ["border_width", "border_color", "background_color"] + ): + table_cell_style = {} + fields = [] + + if "border_width" in style_options: + border_width = {"magnitude": style_options["border_width"], "unit": "PT"} + table_cell_style["borderTop"] = {"width": border_width} + table_cell_style["borderBottom"] = {"width": border_width} + table_cell_style["borderLeft"] = {"width": border_width} + table_cell_style["borderRight"] = {"width": border_width} + fields.extend(["borderTop", "borderBottom", "borderLeft", "borderRight"]) + + if "border_color" in style_options: + border_color = {"color": {"rgbColor": style_options["border_color"]}} + if "borderTop" in table_cell_style: + table_cell_style["borderTop"]["color"] = border_color["color"] + table_cell_style["borderBottom"]["color"] = border_color["color"] + table_cell_style["borderLeft"]["color"] = border_color["color"] + table_cell_style["borderRight"]["color"] = border_color["color"] + + if "background_color" in style_options: + table_cell_style["backgroundColor"] = { + "color": {"rgbColor": style_options["background_color"]} + } + fields.append("backgroundColor") + + if table_cell_style and fields: + requests.append( + { + "updateTableCellStyle": { + "tableStartLocation": {"index": table_start_index}, + "tableCellStyle": table_cell_style, + "fields": ",".join(fields), + } + } + ) + + # Header row specific styling + if "header_background" in style_options: + requests.append( + { + "updateTableCellStyle": { + "tableRange": { + "tableCellLocation": { + "tableStartLocation": {"index": table_start_index}, + "rowIndex": 0, + "columnIndex": 0, + }, + "rowSpan": 1, + "columnSpan": 100, # Large number to cover all columns + }, + "tableCellStyle": { + "backgroundColor": { + "color": {"rgbColor": style_options["header_background"]} + } + }, + "fields": "backgroundColor", + } + } + ) + + return requests + + +def extract_table_as_data(table_info: Dict[str, Any]) -> List[List[str]]: + """ + Extract table content as a 2D array of strings. + + Args: + table_info: Table information from document structure + + Returns: + 2D list of cell contents + """ + data = [] + cells = table_info.get("cells", []) + + for row in cells: + row_data = [] + for cell in row: + row_data.append(cell.get("content", "").strip()) + data.append(row_data) + + return data + + +def find_table_by_content( + tables: List[Dict[str, Any]], search_text: str, case_sensitive: bool = False +) -> Optional[int]: + """ + Find a table index by searching for content within it. + + Args: + tables: List of table information from document + search_text: Text to search for in table cells + case_sensitive: Whether to do case-sensitive search + + Returns: + Index of the first matching table, or None + """ + search_text = search_text if case_sensitive else search_text.lower() + + for idx, table in enumerate(tables): + for row in table.get("cells", []): + for cell in row: + cell_content = cell.get("content", "") + if not case_sensitive: + cell_content = cell_content.lower() + + if search_text in cell_content: + return idx + + return None + + +def validate_table_data(data: List[List[str]]) -> Tuple[bool, str]: + """ + Validates table data format and provides specific error messages for LLMs. + + WHAT THIS CHECKS: + - Data is a 2D list (list of lists) + - All rows have consistent column counts + - Dimensions are within Google Docs limits + - No None or undefined values + + VALID FORMAT EXAMPLE: + [ + ["Header1", "Header2"], # Row 0 - 2 columns + ["Data1", "Data2"], # Row 1 - 2 columns + ["Data3", "Data4"] # Row 2 - 2 columns + ] + + INVALID FORMATS: + - [["col1"], ["col1", "col2"]] # Inconsistent column counts + - ["col1", "col2"] # Not 2D (missing inner lists) + - [["col1", None]] # Contains None values + - [] or [[]] # Empty data + + Args: + data: 2D array of data to validate + + Returns: + Tuple of (is_valid, error_message_with_examples) + """ + if not data: + return ( + False, + "Data is empty. Use format: [['col1', 'col2'], ['row1col1', 'row1col2']]", + ) + + if not isinstance(data, list): + return ( + False, + f"Data must be a list, got {type(data).__name__}. Use format: [['col1', 'col2'], ['row1col1', 'row1col2']]", + ) + + if not all(isinstance(row, list) for row in data): + return ( + False, + f"Data must be a 2D list (list of lists). Each row must be a list. Check your format: {data}", + ) + + # Check for consistent column count + col_counts = [len(row) for row in data] + if len(set(col_counts)) > 1: + return ( + False, + f"All rows must have same number of columns. Found: {col_counts}. Fix your data format.", + ) + + # Check for reasonable size + rows = len(data) + cols = col_counts[0] if col_counts else 0 + + if rows > 1000: + return False, f"Too many rows ({rows}). Google Docs limit is 1000 rows." + + if cols > 20: + return False, f"Too many columns ({cols}). Google Docs limit is 20 columns." + + return True, f"Valid table data: {rows}x{cols} table format" diff --git a/gdocs/docs_tools.py b/gdocs/docs_tools.py new file mode 100644 index 0000000..3d7b316 --- /dev/null +++ b/gdocs/docs_tools.py @@ -0,0 +1,1918 @@ +""" +Google Docs MCP Tools + +This module provides MCP tools for interacting with Google Docs API and managing Google Docs via Drive. +""" + +import logging +import asyncio +import io +import re +from typing import List, Dict, Any, Optional + +from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload + +# Auth & server utilities +from auth.service_decorator import require_google_service, require_multiple_services +from core.utils import extract_office_xml_text, handle_http_errors +from core.server import server +from core.comments import create_comment_tools + +# Import helper functions for document operations +from gdocs.docs_helpers import ( + create_insert_text_request, + create_delete_range_request, + create_format_text_request, + create_find_replace_request, + create_insert_table_request, + create_insert_page_break_request, + create_insert_image_request, + create_bullet_list_request, + create_insert_doc_tab_request, + create_update_doc_tab_request, + create_delete_doc_tab_request, +) + +# Import document structure and table utilities +from gdocs.docs_structure import ( + parse_document_structure, + find_tables, + analyze_document_complexity, +) +from gdocs.docs_tables import extract_table_as_data +from gdocs.docs_markdown import ( + convert_doc_to_markdown, + format_comments_inline, + format_comments_appendix, + parse_drive_comments, +) + +# Import operation managers for complex business logic +from gdocs.managers import ( + TableOperationManager, + HeaderFooterManager, + ValidationManager, + BatchOperationManager, +) +import json + +logger = logging.getLogger(__name__) + + +@server.tool() +@handle_http_errors("search_docs", is_read_only=True, service_type="docs") +@require_google_service("drive", "drive_read") +async def search_docs( + service: Any, + user_google_email: str, + query: str, + page_size: int = 10, +) -> str: + """ + Searches for Google Docs by name using Drive API (mimeType filter). + + Returns: + str: A formatted list of Google Docs matching the search query. + """ + logger.info(f"[search_docs] Email={user_google_email}, Query='{query}'") + + escaped_query = query.replace("'", "\\'") + + response = await asyncio.to_thread( + service.files() + .list( + q=f"name contains '{escaped_query}' and mimeType='application/vnd.google-apps.document' and trashed=false", + pageSize=page_size, + fields="files(id, name, createdTime, modifiedTime, webViewLink)", + supportsAllDrives=True, + includeItemsFromAllDrives=True, + ) + .execute + ) + files = response.get("files", []) + if not files: + return f"No Google Docs found matching '{query}'." + + output = [f"Found {len(files)} Google Docs matching '{query}':"] + for f in files: + output.append( + f"- {f['name']} (ID: {f['id']}) Modified: {f.get('modifiedTime')} Link: {f.get('webViewLink')}" + ) + return "\n".join(output) + + +@server.tool() +@handle_http_errors("get_doc_content", is_read_only=True, service_type="docs") +@require_multiple_services( + [ + { + "service_type": "drive", + "scopes": "drive_read", + "param_name": "drive_service", + }, + {"service_type": "docs", "scopes": "docs_read", "param_name": "docs_service"}, + ] +) +async def get_doc_content( + drive_service: Any, + docs_service: Any, + user_google_email: str, + document_id: str, +) -> str: + """ + Retrieves content of a Google Doc or a Drive file (like .docx) identified by document_id. + - Native Google Docs: Fetches content via Docs API. + - Office files (.docx, etc.) stored in Drive: Downloads via Drive API and extracts text. + + Returns: + str: The document content with metadata header. + """ + logger.info( + f"[get_doc_content] Invoked. Document/File ID: '{document_id}' for user '{user_google_email}'" + ) + + # Step 2: Get file metadata from Drive + file_metadata = await asyncio.to_thread( + drive_service.files() + .get( + fileId=document_id, + fields="id, name, mimeType, webViewLink", + supportsAllDrives=True, + ) + .execute + ) + mime_type = file_metadata.get("mimeType", "") + file_name = file_metadata.get("name", "Unknown File") + web_view_link = file_metadata.get("webViewLink", "#") + + logger.info( + f"[get_doc_content] File '{file_name}' (ID: {document_id}) has mimeType: '{mime_type}'" + ) + + body_text = "" # Initialize body_text + + # Step 3: Process based on mimeType + if mime_type == "application/vnd.google-apps.document": + logger.info("[get_doc_content] Processing as native Google Doc.") + doc_data = await asyncio.to_thread( + docs_service.documents() + .get(documentId=document_id, includeTabsContent=True) + .execute + ) + # Tab header format constant + TAB_HEADER_FORMAT = "\n--- TAB: {tab_name} (ID: {tab_id}) ---\n" + + def extract_text_from_elements(elements, tab_name=None, tab_id=None, depth=0): + """Extract text from document elements (paragraphs, tables, etc.)""" + # Prevent infinite recursion by limiting depth + if depth > 5: + return "" + text_lines = [] + if tab_name: + text_lines.append( + TAB_HEADER_FORMAT.format(tab_name=tab_name, tab_id=tab_id) + ) + + for element in elements: + if "paragraph" in element: + paragraph = element.get("paragraph", {}) + para_elements = paragraph.get("elements", []) + current_line_text = "" + for pe in para_elements: + text_run = pe.get("textRun", {}) + if text_run and "content" in text_run: + current_line_text += text_run["content"] + if current_line_text.strip(): + text_lines.append(current_line_text) + elif "table" in element: + # Handle table content + table = element.get("table", {}) + table_rows = table.get("tableRows", []) + for row in table_rows: + row_cells = row.get("tableCells", []) + for cell in row_cells: + cell_content = cell.get("content", []) + cell_text = extract_text_from_elements( + cell_content, depth=depth + 1 + ) + if cell_text.strip(): + text_lines.append(cell_text) + return "".join(text_lines) + + def process_tab_hierarchy(tab, level=0): + """Process a tab and its nested child tabs recursively""" + tab_text = "" + + if "documentTab" in tab: + props = tab.get("tabProperties", {}) + tab_title = props.get("title", "Untitled Tab") + tab_id = props.get("tabId", "Unknown ID") + # Add indentation for nested tabs to show hierarchy + if level > 0: + tab_title = " " * level + f"{tab_title}" + tab_body = tab.get("documentTab", {}).get("body", {}).get("content", []) + tab_text += extract_text_from_elements(tab_body, tab_title, tab_id) + + # Process child tabs (nested tabs) + child_tabs = tab.get("childTabs", []) + for child_tab in child_tabs: + tab_text += process_tab_hierarchy(child_tab, level + 1) + + return tab_text + + processed_text_lines = [] + + # Process main document body + body_elements = doc_data.get("body", {}).get("content", []) + main_content = extract_text_from_elements(body_elements) + if main_content.strip(): + processed_text_lines.append(main_content) + + # Process all tabs + tabs = doc_data.get("tabs", []) + for tab in tabs: + tab_content = process_tab_hierarchy(tab) + if tab_content.strip(): + processed_text_lines.append(tab_content) + + body_text = "".join(processed_text_lines) + else: + logger.info( + f"[get_doc_content] Processing as Drive file (e.g., .docx, other). MimeType: {mime_type}" + ) + + export_mime_type_map = { + # Example: "application/vnd.google-apps.spreadsheet"z: "text/csv", + # Native GSuite types that are not Docs would go here if this function + # was intended to export them. For .docx, direct download is used. + } + effective_export_mime = export_mime_type_map.get(mime_type) + + request_obj = ( + drive_service.files().export_media( + fileId=document_id, + mimeType=effective_export_mime, + supportsAllDrives=True, + ) + if effective_export_mime + else drive_service.files().get_media( + fileId=document_id, supportsAllDrives=True + ) + ) + + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request_obj) + loop = asyncio.get_event_loop() + done = False + while not done: + status, done = await loop.run_in_executor(None, downloader.next_chunk) + + file_content_bytes = fh.getvalue() + + office_text = extract_office_xml_text(file_content_bytes, mime_type) + if office_text: + body_text = office_text + else: + try: + body_text = file_content_bytes.decode("utf-8") + except UnicodeDecodeError: + body_text = ( + f"[Binary or unsupported text encoding for mimeType '{mime_type}' - " + f"{len(file_content_bytes)} bytes]" + ) + + header = ( + f'File: "{file_name}" (ID: {document_id}, Type: {mime_type})\n' + f"Link: {web_view_link}\n\n--- CONTENT ---\n" + ) + return header + body_text + + +@server.tool() +@handle_http_errors("list_docs_in_folder", is_read_only=True, service_type="docs") +@require_google_service("drive", "drive_read") +async def list_docs_in_folder( + service: Any, user_google_email: str, folder_id: str = "root", page_size: int = 100 +) -> str: + """ + Lists Google Docs within a specific Drive folder. + + Returns: + str: A formatted list of Google Docs in the specified folder. + """ + logger.info( + f"[list_docs_in_folder] Invoked. Email: '{user_google_email}', Folder ID: '{folder_id}'" + ) + + rsp = await asyncio.to_thread( + service.files() + .list( + q=f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.document' and trashed=false", + pageSize=page_size, + fields="files(id, name, modifiedTime, webViewLink)", + supportsAllDrives=True, + includeItemsFromAllDrives=True, + ) + .execute + ) + items = rsp.get("files", []) + if not items: + return f"No Google Docs found in folder '{folder_id}'." + out = [f"Found {len(items)} Docs in folder '{folder_id}':"] + for f in items: + out.append( + f"- {f['name']} (ID: {f['id']}) Modified: {f.get('modifiedTime')} Link: {f.get('webViewLink')}" + ) + return "\n".join(out) + + +@server.tool() +@handle_http_errors("create_doc", service_type="docs") +@require_google_service("docs", "docs_write") +async def create_doc( + service: Any, + user_google_email: str, + title: str, + content: str = "", +) -> str: + """ + Creates a new Google Doc and optionally inserts initial content. + + Returns: + str: Confirmation message with document ID and link. + """ + logger.info(f"[create_doc] Invoked. Email: '{user_google_email}', Title='{title}'") + + doc = await asyncio.to_thread( + service.documents().create(body={"title": title}).execute + ) + doc_id = doc.get("documentId") + if content: + requests = [{"insertText": {"location": {"index": 1}, "text": content}}] + await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=doc_id, body={"requests": requests}) + .execute + ) + link = f"https://docs.google.com/document/d/{doc_id}/edit" + msg = f"Created Google Doc '{title}' (ID: {doc_id}) for {user_google_email}. Link: {link}" + logger.info( + f"Successfully created Google Doc '{title}' (ID: {doc_id}) for {user_google_email}. Link: {link}" + ) + return msg + + +@server.tool() +@handle_http_errors("modify_doc_text", service_type="docs") +@require_google_service("docs", "docs_write") +async def modify_doc_text( + service: Any, + user_google_email: str, + document_id: str, + start_index: int, + end_index: int = None, + text: str = None, + bold: bool = None, + italic: bool = None, + underline: bool = None, + font_size: int = None, + font_family: str = None, + text_color: str = None, + background_color: str = None, + link_url: str = None, +) -> str: + """ + Modifies text in a Google Doc - can insert/replace text and/or apply formatting in a single operation. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + start_index: Start position for operation (0-based) + end_index: End position for text replacement/formatting (if not provided with text, text is inserted) + text: New text to insert or replace with (optional - can format existing text without changing it) + bold: Whether to make text bold (True/False/None to leave unchanged) + italic: Whether to make text italic (True/False/None to leave unchanged) + underline: Whether to underline text (True/False/None to leave unchanged) + font_size: Font size in points + font_family: Font family name (e.g., "Arial", "Times New Roman") + text_color: Foreground text color (#RRGGBB) + background_color: Background/highlight color (#RRGGBB) + link_url: Hyperlink URL (http/https) + + Returns: + str: Confirmation message with operation details + """ + logger.info( + f"[modify_doc_text] Doc={document_id}, start={start_index}, end={end_index}, text={text is not None}, " + f"formatting={any(p is not None for p in [bold, italic, underline, font_size, font_family, text_color, background_color, link_url])}" + ) + + # Input validation + validator = ValidationManager() + + is_valid, error_msg = validator.validate_document_id(document_id) + if not is_valid: + return f"Error: {error_msg}" + + # Validate that we have something to do + formatting_params = [ + bold, + italic, + underline, + font_size, + font_family, + text_color, + background_color, + link_url, + ] + if text is None and not any(p is not None for p in formatting_params): + return "Error: Must provide either 'text' to insert/replace, or formatting parameters (bold, italic, underline, font_size, font_family, text_color, background_color, link_url)." + + # Validate text formatting params if provided + if any(p is not None for p in formatting_params): + is_valid, error_msg = validator.validate_text_formatting_params( + bold, + italic, + underline, + font_size, + font_family, + text_color, + background_color, + link_url, + ) + if not is_valid: + return f"Error: {error_msg}" + + # For formatting, we need end_index + if end_index is None: + return "Error: 'end_index' is required when applying formatting." + + is_valid, error_msg = validator.validate_index_range(start_index, end_index) + if not is_valid: + return f"Error: {error_msg}" + + requests = [] + operations = [] + + # Handle text insertion/replacement + if text is not None: + if end_index is not None and end_index > start_index: + # Text replacement + if start_index == 0: + # Special case: Cannot delete at index 0 (first section break) + # Instead, we insert new text at index 1 and then delete the old text + requests.append(create_insert_text_request(1, text)) + adjusted_end = end_index + len(text) + requests.append( + create_delete_range_request(1 + len(text), adjusted_end) + ) + operations.append( + f"Replaced text from index {start_index} to {end_index}" + ) + else: + # Normal replacement: delete old text, then insert new text + requests.extend( + [ + create_delete_range_request(start_index, end_index), + create_insert_text_request(start_index, text), + ] + ) + operations.append( + f"Replaced text from index {start_index} to {end_index}" + ) + else: + # Text insertion + actual_index = 1 if start_index == 0 else start_index + requests.append(create_insert_text_request(actual_index, text)) + operations.append(f"Inserted text at index {start_index}") + + # Handle formatting + if any(p is not None for p in formatting_params): + # Adjust range for formatting based on text operations + format_start = start_index + format_end = end_index + + if text is not None: + if end_index is not None and end_index > start_index: + # Text was replaced - format the new text + format_end = start_index + len(text) + else: + # Text was inserted - format the inserted text + actual_index = 1 if start_index == 0 else start_index + format_start = actual_index + format_end = actual_index + len(text) + + # Handle special case for formatting at index 0 + if format_start == 0: + format_start = 1 + if format_end is not None and format_end <= format_start: + format_end = format_start + 1 + + requests.append( + create_format_text_request( + format_start, + format_end, + bold, + italic, + underline, + font_size, + font_family, + text_color, + background_color, + link_url, + ) + ) + + format_details = [ + f"{name}={value}" + for name, value in [ + ("bold", bold), + ("italic", italic), + ("underline", underline), + ("font_size", font_size), + ("font_family", font_family), + ("text_color", text_color), + ("background_color", background_color), + ("link_url", link_url), + ] + if value is not None + ] + + operations.append( + f"Applied formatting ({', '.join(format_details)}) to range {format_start}-{format_end}" + ) + + await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=document_id, body={"requests": requests}) + .execute + ) + + link = f"https://docs.google.com/document/d/{document_id}/edit" + operation_summary = "; ".join(operations) + text_info = f" Text length: {len(text)} characters." if text else "" + return f"{operation_summary} in document {document_id}.{text_info} Link: {link}" + + +@server.tool() +@handle_http_errors("find_and_replace_doc", service_type="docs") +@require_google_service("docs", "docs_write") +async def find_and_replace_doc( + service: Any, + user_google_email: str, + document_id: str, + find_text: str, + replace_text: str, + match_case: bool = False, + tab_id: Optional[str] = None, +) -> str: + """ + Finds and replaces text throughout a Google Doc. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + find_text: Text to search for + replace_text: Text to replace with + match_case: Whether to match case exactly + tab_id: Optional ID of the tab to target + + Returns: + str: Confirmation message with replacement count + """ + logger.info( + f"[find_and_replace_doc] Doc={document_id}, find='{find_text}', replace='{replace_text}', tab='{tab_id}'" + ) + + requests = [ + create_find_replace_request(find_text, replace_text, match_case, tab_id) + ] + + result = await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=document_id, body={"requests": requests}) + .execute + ) + + # Extract number of replacements from response + replacements = 0 + if "replies" in result and result["replies"]: + reply = result["replies"][0] + if "replaceAllText" in reply: + replacements = reply["replaceAllText"].get("occurrencesChanged", 0) + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"Replaced {replacements} occurrence(s) of '{find_text}' with '{replace_text}' in document {document_id}. Link: {link}" + + +@server.tool() +@handle_http_errors("insert_doc_elements", service_type="docs") +@require_google_service("docs", "docs_write") +async def insert_doc_elements( + service: Any, + user_google_email: str, + document_id: str, + element_type: str, + index: int, + rows: int = None, + columns: int = None, + list_type: str = None, + text: str = None, +) -> str: + """ + Inserts structural elements like tables, lists, or page breaks into a Google Doc. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + element_type: Type of element to insert ("table", "list", "page_break") + index: Position to insert element (0-based) + rows: Number of rows for table (required for table) + columns: Number of columns for table (required for table) + list_type: Type of list ("UNORDERED", "ORDERED") (required for list) + text: Initial text content for list items + + Returns: + str: Confirmation message with insertion details + """ + logger.info( + f"[insert_doc_elements] Doc={document_id}, type={element_type}, index={index}" + ) + + # Handle the special case where we can't insert at the first section break + # If index is 0, bump it to 1 to avoid the section break + if index == 0: + logger.debug("Adjusting index from 0 to 1 to avoid first section break") + index = 1 + + requests = [] + + if element_type == "table": + if not rows or not columns: + return "Error: 'rows' and 'columns' parameters are required for table insertion." + + requests.append(create_insert_table_request(index, rows, columns)) + description = f"table ({rows}x{columns})" + + elif element_type == "list": + if not list_type: + return "Error: 'list_type' parameter is required for list insertion ('UNORDERED' or 'ORDERED')." + + if not text: + text = "List item" + + # Insert text first, then create list + requests.extend( + [ + create_insert_text_request(index, text + "\n"), + create_bullet_list_request(index, index + len(text), list_type), + ] + ) + description = f"{list_type.lower()} list" + + elif element_type == "page_break": + requests.append(create_insert_page_break_request(index)) + description = "page break" + + else: + return f"Error: Unsupported element type '{element_type}'. Supported types: 'table', 'list', 'page_break'." + + await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=document_id, body={"requests": requests}) + .execute + ) + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"Inserted {description} at index {index} in document {document_id}. Link: {link}" + + +@server.tool() +@handle_http_errors("insert_doc_image", service_type="docs") +@require_multiple_services( + [ + {"service_type": "docs", "scopes": "docs_write", "param_name": "docs_service"}, + { + "service_type": "drive", + "scopes": "drive_read", + "param_name": "drive_service", + }, + ] +) +async def insert_doc_image( + docs_service: Any, + drive_service: Any, + user_google_email: str, + document_id: str, + image_source: str, + index: int, + width: int = 0, + height: int = 0, +) -> str: + """ + Inserts an image into a Google Doc from Drive or a URL. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + image_source: Drive file ID or public image URL + index: Position to insert image (0-based) + width: Image width in points (optional) + height: Image height in points (optional) + + Returns: + str: Confirmation message with insertion details + """ + logger.info( + f"[insert_doc_image] Doc={document_id}, source={image_source}, index={index}" + ) + + # Handle the special case where we can't insert at the first section break + # If index is 0, bump it to 1 to avoid the section break + if index == 0: + logger.debug("Adjusting index from 0 to 1 to avoid first section break") + index = 1 + + # Determine if source is a Drive file ID or URL + is_drive_file = not ( + image_source.startswith("http://") or image_source.startswith("https://") + ) + + if is_drive_file: + # Verify Drive file exists and get metadata + try: + file_metadata = await asyncio.to_thread( + drive_service.files() + .get( + fileId=image_source, + fields="id, name, mimeType", + supportsAllDrives=True, + ) + .execute + ) + mime_type = file_metadata.get("mimeType", "") + if not mime_type.startswith("image/"): + return f"Error: File {image_source} is not an image (MIME type: {mime_type})." + + image_uri = f"https://drive.google.com/uc?id={image_source}" + source_description = f"Drive file {file_metadata.get('name', image_source)}" + except Exception as e: + return f"Error: Could not access Drive file {image_source}: {str(e)}" + else: + image_uri = image_source + source_description = "URL image" + + # Use helper to create image request + requests = [create_insert_image_request(index, image_uri, width, height)] + + await asyncio.to_thread( + docs_service.documents() + .batchUpdate(documentId=document_id, body={"requests": requests}) + .execute + ) + + size_info = "" + if width or height: + size_info = f" (size: {width or 'auto'}x{height or 'auto'} points)" + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"Inserted {source_description}{size_info} at index {index} in document {document_id}. Link: {link}" + + +@server.tool() +@handle_http_errors("update_doc_headers_footers", service_type="docs") +@require_google_service("docs", "docs_write") +async def update_doc_headers_footers( + service: Any, + user_google_email: str, + document_id: str, + section_type: str, + content: str, + header_footer_type: str = "DEFAULT", +) -> str: + """ + Updates headers or footers in a Google Doc. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + section_type: Type of section to update ("header" or "footer") + content: Text content for the header/footer + header_footer_type: Type of header/footer ("DEFAULT", "FIRST_PAGE_ONLY", "EVEN_PAGE") + + Returns: + str: Confirmation message with update details + """ + logger.info(f"[update_doc_headers_footers] Doc={document_id}, type={section_type}") + + # Input validation + validator = ValidationManager() + + is_valid, error_msg = validator.validate_document_id(document_id) + if not is_valid: + return f"Error: {error_msg}" + + is_valid, error_msg = validator.validate_header_footer_params( + section_type, header_footer_type + ) + if not is_valid: + return f"Error: {error_msg}" + + is_valid, error_msg = validator.validate_text_content(content) + if not is_valid: + return f"Error: {error_msg}" + + # Use HeaderFooterManager to handle the complex logic + header_footer_manager = HeaderFooterManager(service) + + success, message = await header_footer_manager.update_header_footer_content( + document_id, section_type, content, header_footer_type + ) + + if success: + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"{message}. Link: {link}" + else: + return f"Error: {message}" + + +@server.tool() +@handle_http_errors("batch_update_doc", service_type="docs") +@require_google_service("docs", "docs_write") +async def batch_update_doc( + service: Any, + user_google_email: str, + document_id: str, + operations: List[Dict[str, Any]], +) -> str: + """ + Executes multiple document operations in a single atomic batch update. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + operations: List of operation dicts. Each operation MUST have a 'type' field. + All operations accept an optional 'tab_id' to target a specific tab. + + Supported operation types and their parameters: + + insert_text - required: index (int), text (str) + delete_text - required: start_index (int), end_index (int) + replace_text - required: start_index (int), end_index (int), text (str) + format_text - required: start_index (int), end_index (int) + optional: bold, italic, underline, font_size, font_family, + text_color, background_color, link_url + update_paragraph_style + - required: start_index (int), end_index (int) + optional: heading_level (0-6, 0=normal), alignment + (START/CENTER/END/JUSTIFIED), line_spacing, + indent_first_line, indent_start, indent_end, + space_above, space_below + insert_table - required: index (int), rows (int), columns (int) + insert_page_break- required: index (int) + find_replace - required: find_text (str), replace_text (str) + optional: match_case (bool, default false) + create_bullet_list - required: start_index (int), end_index (int) + optional: list_type ('UNORDERED'|'ORDERED'|'NONE', default UNORDERED), + nesting_level (0-8), paragraph_start_indices (list[int]) + Use list_type='NONE' to remove existing bullet/list formatting + insert_doc_tab - required: title (str), index (int) + optional: parent_tab_id (str) + delete_doc_tab - required: tab_id (str) + update_doc_tab - required: tab_id (str), title (str) + + Example operations: + [ + {"type": "insert_text", "index": 1, "text": "Hello World"}, + {"type": "format_text", "start_index": 1, "end_index": 12, "bold": true}, + {"type": "update_paragraph_style", "start_index": 1, "end_index": 12, + "heading_level": 1, "alignment": "CENTER"}, + {"type": "find_replace", "find_text": "foo", "replace_text": "bar"}, + {"type": "insert_table", "index": 20, "rows": 2, "columns": 3}, + {"type": "insert_doc_tab", "title": "Appendix", "index": 1} + ] + + Returns: + str: Confirmation message with batch operation results + """ + logger.debug(f"[batch_update_doc] Doc={document_id}, operations={len(operations)}") + + # Input validation + validator = ValidationManager() + + is_valid, error_msg = validator.validate_document_id(document_id) + if not is_valid: + return f"Error: {error_msg}" + + is_valid, error_msg = validator.validate_batch_operations(operations) + if not is_valid: + return f"Error: {error_msg}" + + # Use BatchOperationManager to handle the complex logic + batch_manager = BatchOperationManager(service) + + success, message, metadata = await batch_manager.execute_batch_operations( + document_id, operations + ) + + if success: + link = f"https://docs.google.com/document/d/{document_id}/edit" + replies_count = metadata.get("replies_count", 0) + return f"{message} on document {document_id}. API replies: {replies_count}. Link: {link}" + else: + return f"Error: {message}" + + +@server.tool() +@handle_http_errors("inspect_doc_structure", is_read_only=True, service_type="docs") +@require_google_service("docs", "docs_read") +async def inspect_doc_structure( + service: Any, + user_google_email: str, + document_id: str, + detailed: bool = False, + tab_id: str = None, +) -> str: + """ + Essential tool for finding safe insertion points and understanding document structure. + + USE THIS FOR: + - Finding the correct index for table insertion + - Understanding document layout before making changes + - Locating existing tables and their positions + - Getting document statistics and complexity info + - Inspecting structure of specific tabs + + CRITICAL FOR TABLE OPERATIONS: + ALWAYS call this BEFORE creating tables to get a safe insertion index. + + WHAT THE OUTPUT SHOWS: + - total_elements: Number of document elements + - total_length: Maximum safe index for insertion + - tables: Number of existing tables + - table_details: Position and dimensions of each table + - tabs: List of available tabs in the document (if no tab_id specified) + + WORKFLOW: + Step 1: Call this function + Step 2: Note the "total_length" value + Step 3: Use an index < total_length for table insertion + Step 4: Create your table + + Args: + user_google_email: User's Google email address + document_id: ID of the document to inspect + detailed: Whether to return detailed structure information + tab_id: Optional ID of the tab to inspect. If not provided, inspects main document. + + Returns: + str: JSON string containing document structure and safe insertion indices + """ + logger.debug( + f"[inspect_doc_structure] Doc={document_id}, detailed={detailed}, tab_id={tab_id}" + ) + + # Get the document + doc = await asyncio.to_thread( + service.documents().get(documentId=document_id, includeTabsContent=True).execute + ) + + # If tab_id is specified, find the tab and use its content + target_content = doc.get("body", {}) + + def find_tab(tabs, target_id): + for tab in tabs: + if tab.get("tabProperties", {}).get("tabId") == target_id: + return tab + if "childTabs" in tab: + found = find_tab(tab["childTabs"], target_id) + if found: + return found + return None + + if tab_id: + tab = find_tab(doc.get("tabs", []), tab_id) + if tab and "documentTab" in tab: + target_content = tab["documentTab"].get("body", {}) + elif tab: + return f"Error: Tab {tab_id} is not a document tab and has no body content." + else: + return f"Error: Tab {tab_id} not found in document." + + # Create a dummy doc object for analysis tools that expect a full doc + analysis_doc = doc.copy() + analysis_doc["body"] = target_content + + if detailed: + # Return full parsed structure + structure = parse_document_structure(analysis_doc) + + # Simplify for JSON serialization + result = { + "title": structure["title"], + "total_length": structure["total_length"], + "statistics": { + "elements": len(structure["body"]), + "tables": len(structure["tables"]), + "paragraphs": sum( + 1 for e in structure["body"] if e.get("type") == "paragraph" + ), + "has_headers": bool(structure["headers"]), + "has_footers": bool(structure["footers"]), + }, + "elements": [], + } + + # Add element summaries + for element in structure["body"]: + elem_summary = { + "type": element["type"], + "start_index": element["start_index"], + "end_index": element["end_index"], + } + + if element["type"] == "table": + elem_summary["rows"] = element["rows"] + elem_summary["columns"] = element["columns"] + elem_summary["cell_count"] = len(element.get("cells", [])) + elif element["type"] == "paragraph": + elem_summary["text_preview"] = element.get("text", "")[:100] + + result["elements"].append(elem_summary) + + # Add table details + if structure["tables"]: + result["tables"] = [] + for i, table in enumerate(structure["tables"]): + table_data = extract_table_as_data(table) + result["tables"].append( + { + "index": i, + "position": { + "start": table["start_index"], + "end": table["end_index"], + }, + "dimensions": { + "rows": table["rows"], + "columns": table["columns"], + }, + "preview": table_data[:3] if table_data else [], # First 3 rows + } + ) + + else: + # Return basic analysis + result = analyze_document_complexity(analysis_doc) + + # Add table information + tables = find_tables(analysis_doc) + if tables: + result["table_details"] = [] + for i, table in enumerate(tables): + result["table_details"].append( + { + "index": i, + "rows": table["rows"], + "columns": table["columns"], + "start_index": table["start_index"], + "end_index": table["end_index"], + } + ) + + # Always include available tabs if no tab_id was specified + if not tab_id: + + def get_tabs_summary(tabs): + summary = [] + for tab in tabs: + props = tab.get("tabProperties", {}) + tab_info = { + "title": props.get("title"), + "tab_id": props.get("tabId"), + } + if "childTabs" in tab: + tab_info["child_tabs"] = get_tabs_summary(tab["childTabs"]) + summary.append(tab_info) + return summary + + result["tabs"] = get_tabs_summary(doc.get("tabs", [])) + + if tab_id: + result["inspected_tab_id"] = tab_id + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"Document structure analysis for {document_id}:\n\n{json.dumps(result, indent=2)}\n\nLink: {link}" + + +@server.tool() +@handle_http_errors("create_table_with_data", service_type="docs") +@require_google_service("docs", "docs_write") +async def create_table_with_data( + service: Any, + user_google_email: str, + document_id: str, + table_data: List[List[str]], + index: int, + bold_headers: bool = True, + tab_id: Optional[str] = None, +) -> str: + """ + Creates a table and populates it with data in one reliable operation. + + CRITICAL: YOU MUST CALL inspect_doc_structure FIRST TO GET THE INDEX! + + MANDATORY WORKFLOW - DO THESE STEPS IN ORDER: + + Step 1: ALWAYS call inspect_doc_structure first + Step 2: Use the 'total_length' value from inspect_doc_structure as your index + Step 3: Format data as 2D list: [["col1", "col2"], ["row1col1", "row1col2"]] + Step 4: Call this function with the correct index and data + + EXAMPLE DATA FORMAT: + table_data = [ + ["Header1", "Header2", "Header3"], # Row 0 - headers + ["Data1", "Data2", "Data3"], # Row 1 - first data row + ["Data4", "Data5", "Data6"] # Row 2 - second data row + ] + + CRITICAL INDEX REQUIREMENTS: + - NEVER use index values like 1, 2, 10 without calling inspect_doc_structure first + - ALWAYS get index from inspect_doc_structure 'total_length' field + - Index must be a valid insertion point in the document + + DATA FORMAT REQUIREMENTS: + - Must be 2D list of strings only + - Each inner list = one table row + - All rows MUST have same number of columns + - Use empty strings "" for empty cells, never None + - Use debug_table_structure after creation to verify results + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + table_data: 2D list of strings - EXACT format: [["col1", "col2"], ["row1col1", "row1col2"]] + index: Document position (MANDATORY: get from inspect_doc_structure 'total_length') + bold_headers: Whether to make first row bold (default: true) + tab_id: Optional tab ID to create the table in a specific tab + + Returns: + str: Confirmation with table details and link + """ + logger.debug(f"[create_table_with_data] Doc={document_id}, index={index}") + + # Input validation + validator = ValidationManager() + + is_valid, error_msg = validator.validate_document_id(document_id) + if not is_valid: + return f"ERROR: {error_msg}" + + is_valid, error_msg = validator.validate_table_data(table_data) + if not is_valid: + return f"ERROR: {error_msg}" + + is_valid, error_msg = validator.validate_index(index, "Index") + if not is_valid: + return f"ERROR: {error_msg}" + + # Use TableOperationManager to handle the complex logic + table_manager = TableOperationManager(service) + + # Try to create the table, and if it fails due to index being at document end, retry with index-1 + success, message, metadata = await table_manager.create_and_populate_table( + document_id, table_data, index, bold_headers, tab_id + ) + + # If it failed due to index being at or beyond document end, retry with adjusted index + if not success and "must be less than the end index" in message: + logger.debug( + f"Index {index} is at document boundary, retrying with index {index - 1}" + ) + success, message, metadata = await table_manager.create_and_populate_table( + document_id, table_data, index - 1, bold_headers, tab_id + ) + + if success: + link = f"https://docs.google.com/document/d/{document_id}/edit" + rows = metadata.get("rows", 0) + columns = metadata.get("columns", 0) + + return ( + f"SUCCESS: {message}. Table: {rows}x{columns}, Index: {index}. Link: {link}" + ) + else: + return f"ERROR: {message}" + + +@server.tool() +@handle_http_errors("debug_table_structure", is_read_only=True, service_type="docs") +@require_google_service("docs", "docs_read") +async def debug_table_structure( + service: Any, + user_google_email: str, + document_id: str, + table_index: int = 0, +) -> str: + """ + ESSENTIAL DEBUGGING TOOL - Use this whenever tables don't work as expected. + + USE THIS IMMEDIATELY WHEN: + - Table population put data in wrong cells + - You get "table not found" errors + - Data appears concatenated in first cell + - Need to understand existing table structure + - Planning to use populate_existing_table + + WHAT THIS SHOWS YOU: + - Exact table dimensions (rows × columns) + - Each cell's position coordinates (row,col) + - Current content in each cell + - Insertion indices for each cell + - Table boundaries and ranges + + HOW TO READ THE OUTPUT: + - "dimensions": "2x3" = 2 rows, 3 columns + - "position": "(0,0)" = first row, first column + - "current_content": What's actually in each cell right now + - "insertion_index": Where new text would be inserted in that cell + + WORKFLOW INTEGRATION: + 1. After creating table → Use this to verify structure + 2. Before populating → Use this to plan your data format + 3. After population fails → Use this to see what went wrong + 4. When debugging → Compare your data array to actual table structure + + Args: + user_google_email: User's Google email address + document_id: ID of the document to inspect + table_index: Which table to debug (0 = first table, 1 = second table, etc.) + + Returns: + str: Detailed JSON structure showing table layout, cell positions, and current content + """ + logger.debug( + f"[debug_table_structure] Doc={document_id}, table_index={table_index}" + ) + + # Get the document + doc = await asyncio.to_thread( + service.documents().get(documentId=document_id).execute + ) + + # Find tables + tables = find_tables(doc) + if table_index >= len(tables): + return f"Error: Table index {table_index} not found. Document has {len(tables)} table(s)." + + table_info = tables[table_index] + + # Extract detailed cell information + debug_info = { + "table_index": table_index, + "dimensions": f"{table_info['rows']}x{table_info['columns']}", + "table_range": f"[{table_info['start_index']}-{table_info['end_index']}]", + "cells": [], + } + + for row_idx, row in enumerate(table_info["cells"]): + row_info = [] + for col_idx, cell in enumerate(row): + cell_debug = { + "position": f"({row_idx},{col_idx})", + "range": f"[{cell['start_index']}-{cell['end_index']}]", + "insertion_index": cell.get("insertion_index", "N/A"), + "current_content": repr(cell.get("content", "")), + "content_elements_count": len(cell.get("content_elements", [])), + } + row_info.append(cell_debug) + debug_info["cells"].append(row_info) + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"Table structure debug for table {table_index}:\n\n{json.dumps(debug_info, indent=2)}\n\nLink: {link}" + + +@server.tool() +@handle_http_errors("export_doc_to_pdf", service_type="drive") +@require_google_service("drive", "drive_file") +async def export_doc_to_pdf( + service: Any, + user_google_email: str, + document_id: str, + pdf_filename: str = None, + folder_id: str = None, +) -> str: + """ + Exports a Google Doc to PDF format and saves it to Google Drive. + + Args: + user_google_email: User's Google email address + document_id: ID of the Google Doc to export + pdf_filename: Name for the PDF file (optional - if not provided, uses original name + "_PDF") + folder_id: Drive folder ID to save PDF in (optional - if not provided, saves in root) + + Returns: + str: Confirmation message with PDF file details and links + """ + logger.info( + f"[export_doc_to_pdf] Email={user_google_email}, Doc={document_id}, pdf_filename={pdf_filename}, folder_id={folder_id}" + ) + + # Get file metadata first to validate it's a Google Doc + try: + file_metadata = await asyncio.to_thread( + service.files() + .get( + fileId=document_id, + fields="id, name, mimeType, webViewLink", + supportsAllDrives=True, + ) + .execute + ) + except Exception as e: + return f"Error: Could not access document {document_id}: {str(e)}" + + mime_type = file_metadata.get("mimeType", "") + original_name = file_metadata.get("name", "Unknown Document") + web_view_link = file_metadata.get("webViewLink", "#") + + # Verify it's a Google Doc + if mime_type != "application/vnd.google-apps.document": + return f"Error: File '{original_name}' is not a Google Doc (MIME type: {mime_type}). Only native Google Docs can be exported to PDF." + + logger.info(f"[export_doc_to_pdf] Exporting '{original_name}' to PDF") + + # Export the document as PDF + try: + request_obj = service.files().export_media( + fileId=document_id, mimeType="application/pdf" + ) + + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request_obj) + + done = False + while not done: + _, done = await asyncio.to_thread(downloader.next_chunk) + + pdf_content = fh.getvalue() + pdf_size = len(pdf_content) + + except Exception as e: + return f"Error: Failed to export document to PDF: {str(e)}" + + # Determine PDF filename + if not pdf_filename: + pdf_filename = f"{original_name}_PDF.pdf" + elif not pdf_filename.endswith(".pdf"): + pdf_filename += ".pdf" + + # Upload PDF to Drive + try: + # Reuse the existing BytesIO object by resetting to the beginning + fh.seek(0) + # Create media upload object + media = MediaIoBaseUpload(fh, mimetype="application/pdf", resumable=True) + + # Prepare file metadata for upload + file_metadata = {"name": pdf_filename, "mimeType": "application/pdf"} + + # Add parent folder if specified + if folder_id: + file_metadata["parents"] = [folder_id] + + # Upload the file + uploaded_file = await asyncio.to_thread( + service.files() + .create( + body=file_metadata, + media_body=media, + fields="id, name, webViewLink, parents", + supportsAllDrives=True, + ) + .execute + ) + + pdf_file_id = uploaded_file.get("id") + pdf_web_link = uploaded_file.get("webViewLink", "#") + pdf_parents = uploaded_file.get("parents", []) + + logger.info( + f"[export_doc_to_pdf] Successfully uploaded PDF to Drive: {pdf_file_id}" + ) + + folder_info = "" + if folder_id: + folder_info = f" in folder {folder_id}" + elif pdf_parents: + folder_info = f" in folder {pdf_parents[0]}" + + return f"Successfully exported '{original_name}' to PDF and saved to Drive as '{pdf_filename}' (ID: {pdf_file_id}, {pdf_size:,} bytes){folder_info}. PDF: {pdf_web_link} | Original: {web_view_link}" + + except Exception as e: + return f"Error: Failed to upload PDF to Drive: {str(e)}. PDF was generated successfully ({pdf_size:,} bytes) but could not be saved to Drive." + + +# ============================================================================== +# STYLING TOOLS - Paragraph Formatting +# ============================================================================== + + +async def _get_paragraph_start_indices_in_range( + service: Any, document_id: str, start_index: int, end_index: int +) -> list[int]: + """ + Fetch paragraph start indices that overlap a target range. + """ + doc_data = await asyncio.to_thread( + service.documents() + .get( + documentId=document_id, + fields="body/content(startIndex,endIndex,paragraph)", + ) + .execute + ) + + paragraph_starts = [] + for element in doc_data.get("body", {}).get("content", []): + if "paragraph" not in element: + continue + paragraph_start = element.get("startIndex") + paragraph_end = element.get("endIndex") + if not isinstance(paragraph_start, int) or not isinstance(paragraph_end, int): + continue + if paragraph_end > start_index and paragraph_start < end_index: + paragraph_starts.append(paragraph_start) + + return paragraph_starts or [start_index] + + +@server.tool() +@handle_http_errors("update_paragraph_style", service_type="docs") +@require_google_service("docs", "docs_write") +async def update_paragraph_style( + service: Any, + user_google_email: str, + document_id: str, + start_index: int, + end_index: int, + heading_level: int = None, + alignment: str = None, + line_spacing: float = None, + indent_first_line: float = None, + indent_start: float = None, + indent_end: float = None, + space_above: float = None, + space_below: float = None, + named_style_type: str = None, + list_type: str = None, + list_nesting_level: int = None, +) -> str: + """ + Apply paragraph-level formatting, heading styles, and/or list formatting to a range in a Google Doc. + + This tool can apply named heading styles (H1-H6) for semantic document structure, + create bulleted or numbered lists with nested indentation, and customize paragraph + properties like alignment, spacing, and indentation. All operations can be applied + in a single call. + + Args: + user_google_email: User's Google email address + document_id: Document ID to modify + start_index: Start position (1-based) + end_index: End position (exclusive) - should cover the entire paragraph + heading_level: Heading level 0-6 (0 = NORMAL_TEXT, 1 = H1, 2 = H2, etc.) + Use for semantic document structure + alignment: Text alignment - 'START' (left), 'CENTER', 'END' (right), or 'JUSTIFIED' + line_spacing: Line spacing multiplier (1.0 = single, 1.5 = 1.5x, 2.0 = double) + indent_first_line: First line indent in points (e.g., 36 for 0.5 inch) + indent_start: Left/start indent in points + indent_end: Right/end indent in points + space_above: Space above paragraph in points (e.g., 12 for one line) + space_below: Space below paragraph in points + named_style_type: Direct named style type - 'NORMAL_TEXT', 'TITLE', 'SUBTITLE', + 'HEADING_1' through 'HEADING_6'. Mutually exclusive with heading_level. + list_type: Create a list from existing paragraphs ('UNORDERED' for bullets, 'ORDERED' for numbers) + list_nesting_level: Nesting level for lists (0-8, where 0 is top level, default is 0) + Use higher levels for nested/indented list items + + Returns: + str: Confirmation message with formatting details + + Examples: + # Apply H1 heading style + update_paragraph_style(document_id="...", start_index=1, end_index=20, heading_level=1) + + # Create a bulleted list + update_paragraph_style(document_id="...", start_index=1, end_index=50, + list_type="UNORDERED") + + # Create a nested numbered list item + update_paragraph_style(document_id="...", start_index=1, end_index=30, + list_type="ORDERED", list_nesting_level=1) + + # Apply H2 heading with custom spacing + update_paragraph_style(document_id="...", start_index=1, end_index=30, + heading_level=2, space_above=18, space_below=12) + + # Center-align a paragraph with double spacing + update_paragraph_style(document_id="...", start_index=1, end_index=50, + alignment="CENTER", line_spacing=2.0) + """ + logger.info( + f"[update_paragraph_style] Doc={document_id}, Range: {start_index}-{end_index}" + ) + + # Validate range + if start_index < 1: + return "Error: start_index must be >= 1" + if end_index <= start_index: + return "Error: end_index must be greater than start_index" + + # Validate list parameters + list_type_value = list_type + if list_type_value is not None: + # Coerce non-string inputs to string before normalization to avoid AttributeError + if not isinstance(list_type_value, str): + list_type_value = str(list_type_value) + valid_list_types = ["UNORDERED", "ORDERED"] + normalized_list_type = list_type_value.upper() + if normalized_list_type not in valid_list_types: + return f"Error: list_type must be one of: {', '.join(valid_list_types)}" + + list_type_value = normalized_list_type + + if list_nesting_level is not None: + if list_type_value is None: + return "Error: list_nesting_level requires list_type parameter" + if not isinstance(list_nesting_level, int): + return "Error: list_nesting_level must be an integer" + if list_nesting_level < 0 or list_nesting_level > 8: + return "Error: list_nesting_level must be between 0 and 8" + + # Validate named_style_type + if named_style_type is not None and heading_level is not None: + return "Error: heading_level and named_style_type are mutually exclusive; provide only one" + + if named_style_type is not None: + valid_styles = [ + "NORMAL_TEXT", "TITLE", "SUBTITLE", + "HEADING_1", "HEADING_2", "HEADING_3", + "HEADING_4", "HEADING_5", "HEADING_6", + ] + if named_style_type not in valid_styles: + return f"Error: Invalid named_style_type '{named_style_type}'. Must be one of: {', '.join(valid_styles)}" + + # Build paragraph style object + paragraph_style = {} + fields = [] + + # Handle named_style_type (direct named style) + if named_style_type is not None: + paragraph_style["namedStyleType"] = named_style_type + fields.append("namedStyleType") + + # Handle heading level (named style) + elif heading_level is not None: + if heading_level < 0 or heading_level > 6: + return "Error: heading_level must be between 0 (normal text) and 6" + if heading_level == 0: + paragraph_style["namedStyleType"] = "NORMAL_TEXT" + else: + paragraph_style["namedStyleType"] = f"HEADING_{heading_level}" + fields.append("namedStyleType") + + # Handle alignment + if alignment is not None: + valid_alignments = ["START", "CENTER", "END", "JUSTIFIED"] + alignment_upper = alignment.upper() + if alignment_upper not in valid_alignments: + return f"Error: Invalid alignment '{alignment}'. Must be one of: {valid_alignments}" + paragraph_style["alignment"] = alignment_upper + fields.append("alignment") + + # Handle line spacing + if line_spacing is not None: + if line_spacing <= 0: + return "Error: line_spacing must be positive" + paragraph_style["lineSpacing"] = line_spacing * 100 # Convert to percentage + fields.append("lineSpacing") + + # Handle indentation + if indent_first_line is not None: + paragraph_style["indentFirstLine"] = { + "magnitude": indent_first_line, + "unit": "PT", + } + fields.append("indentFirstLine") + + if indent_start is not None: + paragraph_style["indentStart"] = {"magnitude": indent_start, "unit": "PT"} + fields.append("indentStart") + + if indent_end is not None: + paragraph_style["indentEnd"] = {"magnitude": indent_end, "unit": "PT"} + fields.append("indentEnd") + + # Handle spacing + if space_above is not None: + paragraph_style["spaceAbove"] = {"magnitude": space_above, "unit": "PT"} + fields.append("spaceAbove") + + if space_below is not None: + paragraph_style["spaceBelow"] = {"magnitude": space_below, "unit": "PT"} + fields.append("spaceBelow") + + # Create batch update requests + requests = [] + + # Add paragraph style update if we have any style changes + if paragraph_style: + requests.append( + { + "updateParagraphStyle": { + "range": {"startIndex": start_index, "endIndex": end_index}, + "paragraphStyle": paragraph_style, + "fields": ",".join(fields), + } + } + ) + + # Add list creation if requested + if list_type_value is not None: + # Default to level 0 if not specified + nesting_level = list_nesting_level if list_nesting_level is not None else 0 + try: + paragraph_start_indices = None + if nesting_level > 0: + paragraph_start_indices = await _get_paragraph_start_indices_in_range( + service, document_id, start_index, end_index + ) + list_requests = create_bullet_list_request( + start_index, + end_index, + list_type_value, + nesting_level, + paragraph_start_indices=paragraph_start_indices, + ) + requests.extend(list_requests) + except ValueError as e: + return f"Error: {e}" + + # Validate we have at least one operation + if not requests: + return f"No paragraph style changes or list creation specified for document {document_id}" + + await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=document_id, body={"requests": requests}) + .execute + ) + + # Build summary + summary_parts = [] + if "namedStyleType" in paragraph_style: + summary_parts.append(paragraph_style["namedStyleType"]) + format_fields = [f for f in fields if f != "namedStyleType"] + if format_fields: + summary_parts.append(", ".join(format_fields)) + if list_type_value is not None: + list_desc = f"{list_type_value.lower()} list" + if list_nesting_level is not None and list_nesting_level > 0: + list_desc += f" (level {list_nesting_level})" + summary_parts.append(list_desc) + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"Applied paragraph formatting ({', '.join(summary_parts)}) to range {start_index}-{end_index} in document {document_id}. Link: {link}" + + +@server.tool() +@handle_http_errors("get_doc_as_markdown", is_read_only=True, service_type="docs") +@require_multiple_services( + [ + { + "service_type": "drive", + "scopes": "drive_read", + "param_name": "drive_service", + }, + {"service_type": "docs", "scopes": "docs_read", "param_name": "docs_service"}, + ] +) +async def get_doc_as_markdown( + drive_service: Any, + docs_service: Any, + user_google_email: str, + document_id: str, + include_comments: bool = True, + comment_mode: str = "inline", + include_resolved: bool = False, +) -> str: + """ + Reads a Google Doc and returns it as clean Markdown with optional comment context. + + Unlike get_doc_content which returns plain text, this tool preserves document + formatting as Markdown: headings, bold/italic/strikethrough, links, code spans, + ordered/unordered lists with nesting, and tables. + + When comments are included (the default), each comment's anchor text — the specific + text the comment was attached to — is preserved, giving full context for the discussion. + + Args: + user_google_email: User's Google email address + document_id: ID of the Google Doc (or full URL) + include_comments: Whether to include comments (default: True) + comment_mode: How to display comments: + - "inline": Footnote-style references placed at the anchor text location (default) + - "appendix": All comments grouped at the bottom with blockquoted anchor text + - "none": No comments included + include_resolved: Whether to include resolved comments (default: False) + + Returns: + str: The document content as Markdown, optionally with comments + """ + # Extract doc ID from URL if a full URL was provided + url_match = re.search(r"/d/([\w-]+)", document_id) + if url_match: + document_id = url_match.group(1) + + valid_modes = ("inline", "appendix", "none") + if comment_mode not in valid_modes: + return f"Error: comment_mode must be one of {valid_modes}, got '{comment_mode}'" + + logger.info( + f"[get_doc_as_markdown] Doc={document_id}, comments={include_comments}, mode={comment_mode}" + ) + + # Fetch document content via Docs API + doc = await asyncio.to_thread( + docs_service.documents().get(documentId=document_id).execute + ) + + markdown = convert_doc_to_markdown(doc) + + if not include_comments or comment_mode == "none": + return markdown + + # Fetch comments via Drive API + all_comments = [] + page_token = None + + while True: + response = await asyncio.to_thread( + drive_service.comments() + .list( + fileId=document_id, + fields="comments(id,content,author,createdTime,modifiedTime," + "resolved,quotedFileContent," + "replies(id,content,author,createdTime,modifiedTime))," + "nextPageToken", + includeDeleted=False, + pageToken=page_token, + ) + .execute + ) + all_comments.extend(response.get("comments", [])) + page_token = response.get("nextPageToken") + if not page_token: + break + + comments = parse_drive_comments( + {"comments": all_comments}, include_resolved=include_resolved + ) + + if not comments: + return markdown + + if comment_mode == "inline": + return format_comments_inline(markdown, comments) + else: + appendix = format_comments_appendix(comments) + return markdown.rstrip("\n") + "\n\n" + appendix + + +@server.tool() +@handle_http_errors("insert_doc_tab", service_type="docs") +@require_google_service("docs", "docs_write") +async def insert_doc_tab( + service: Any, + user_google_email: str, + document_id: str, + title: str, + index: int, + parent_tab_id: Optional[str] = None, +) -> str: + """ + Inserts a new tab into a Google Doc. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + title: Title of the new tab + index: Position index for the new tab (0-based among sibling tabs) + parent_tab_id: Optional ID of a parent tab to nest the new tab under + + Returns: + str: Confirmation message with document link + """ + logger.info(f"[insert_doc_tab] Doc={document_id}, title='{title}', index={index}") + + request = create_insert_doc_tab_request(title, index, parent_tab_id) + result = await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=document_id, body={"requests": [request]}) + .execute + ) + + # Extract the new tab ID from the batchUpdate response + tab_id = None + if "replies" in result and result["replies"]: + reply = result["replies"][0] + if "createDocumentTab" in reply: + tab_id = reply["createDocumentTab"].get("tabProperties", {}).get("tabId") + + link = f"https://docs.google.com/document/d/{document_id}/edit" + msg = f"Inserted tab '{title}' at index {index} in document {document_id}." + if tab_id: + msg += f" Tab ID: {tab_id}." + if parent_tab_id: + msg += f" Nested under parent tab {parent_tab_id}." + return f"{msg} Link: {link}" + + +@server.tool() +@handle_http_errors("delete_doc_tab", service_type="docs") +@require_google_service("docs", "docs_write") +async def delete_doc_tab( + service: Any, + user_google_email: str, + document_id: str, + tab_id: str, +) -> str: + """ + Deletes a tab from a Google Doc by its tab ID. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + tab_id: ID of the tab to delete (use inspect_doc_structure to find tab IDs) + + Returns: + str: Confirmation message with document link + """ + logger.info(f"[delete_doc_tab] Doc={document_id}, tab_id='{tab_id}'") + + request = create_delete_doc_tab_request(tab_id) + await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=document_id, body={"requests": [request]}) + .execute + ) + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return f"Deleted tab '{tab_id}' from document {document_id}. Link: {link}" + + +@server.tool() +@handle_http_errors("update_doc_tab", service_type="docs") +@require_google_service("docs", "docs_write") +async def update_doc_tab( + service: Any, + user_google_email: str, + document_id: str, + tab_id: str, + title: str, +) -> str: + """ + Renames a tab in a Google Doc. + + Args: + user_google_email: User's Google email address + document_id: ID of the document to update + tab_id: ID of the tab to rename (use inspect_doc_structure to find tab IDs) + title: New title for the tab + + Returns: + str: Confirmation message with document link + """ + logger.info( + f"[update_doc_tab] Doc={document_id}, tab_id='{tab_id}', title='{title}'" + ) + + request = create_update_doc_tab_request(tab_id, title) + await asyncio.to_thread( + service.documents() + .batchUpdate(documentId=document_id, body={"requests": [request]}) + .execute + ) + + link = f"https://docs.google.com/document/d/{document_id}/edit" + return ( + f"Renamed tab '{tab_id}' to '{title}' in document {document_id}. Link: {link}" + ) + + +# Create comment management tools for documents +_comment_tools = create_comment_tools("document", "document_id") + +# Extract and register the functions +list_document_comments = _comment_tools["list_comments"] +manage_document_comment = _comment_tools["manage_comment"] diff --git a/gdocs/managers/__init__.py b/gdocs/managers/__init__.py new file mode 100644 index 0000000..3e8f679 --- /dev/null +++ b/gdocs/managers/__init__.py @@ -0,0 +1,18 @@ +""" +Google Docs Operation Managers + +This package provides high-level manager classes for complex Google Docs operations, +extracting business logic from the main tools module to improve maintainability. +""" + +from .table_operation_manager import TableOperationManager +from .header_footer_manager import HeaderFooterManager +from .validation_manager import ValidationManager +from .batch_operation_manager import BatchOperationManager + +__all__ = [ + "TableOperationManager", + "HeaderFooterManager", + "ValidationManager", + "BatchOperationManager", +] diff --git a/gdocs/managers/batch_operation_manager.py b/gdocs/managers/batch_operation_manager.py new file mode 100644 index 0000000..c0d5368 --- /dev/null +++ b/gdocs/managers/batch_operation_manager.py @@ -0,0 +1,534 @@ +""" +Batch Operation Manager + +This module provides high-level batch operation management for Google Docs, +extracting complex validation and request building logic. +""" + +import logging +import asyncio +from typing import Any, Union, Dict, List, Tuple + +from gdocs.docs_helpers import ( + create_insert_text_request, + create_delete_range_request, + create_format_text_request, + create_update_paragraph_style_request, + create_find_replace_request, + create_insert_table_request, + create_insert_page_break_request, + create_bullet_list_request, + create_delete_bullet_list_request, + create_insert_doc_tab_request, + create_delete_doc_tab_request, + create_update_doc_tab_request, + validate_operation, +) + +logger = logging.getLogger(__name__) + + +class BatchOperationManager: + """ + High-level manager for Google Docs batch operations. + + Handles complex multi-operation requests including: + - Operation validation and request building + - Batch execution with proper error handling + - Operation result processing and reporting + """ + + def __init__(self, service): + """ + Initialize the batch operation manager. + + Args: + service: Google Docs API service instance + """ + self.service = service + + async def execute_batch_operations( + self, document_id: str, operations: list[dict[str, Any]] + ) -> tuple[bool, str, dict[str, Any]]: + """ + Execute multiple document operations in a single atomic batch. + + This method extracts the complex logic from batch_update_doc tool function. + + Args: + document_id: ID of the document to update + operations: List of operation dictionaries + + Returns: + Tuple of (success, message, metadata) + """ + logger.info(f"Executing batch operations on document {document_id}") + logger.info(f"Operations count: {len(operations)}") + + if not operations: + return ( + False, + "No operations provided. Please provide at least one operation.", + {}, + ) + + try: + # Validate and build requests + requests, operation_descriptions = await self._validate_and_build_requests( + operations + ) + + if not requests: + return False, "No valid requests could be built from operations", {} + + # Execute the batch + result = await self._execute_batch_requests(document_id, requests) + + # Process results + metadata = { + "operations_count": len(operations), + "requests_count": len(requests), + "replies_count": len(result.get("replies", [])), + "operation_summary": operation_descriptions[:5], # First 5 operations + } + + # Extract new tab IDs from insert_doc_tab replies + created_tabs = self._extract_created_tabs(result) + if created_tabs: + metadata["created_tabs"] = created_tabs + + summary = self._build_operation_summary(operation_descriptions) + msg = f"Successfully executed {len(operations)} operations ({summary})" + if created_tabs: + tab_info = ", ".join( + f"'{t['title']}' (tab_id: {t['tab_id']})" for t in created_tabs + ) + msg += f". Created tabs: {tab_info}" + + return True, msg, metadata + + except Exception as e: + logger.error(f"Failed to execute batch operations: {str(e)}") + return False, f"Batch operation failed: {str(e)}", {} + + async def _validate_and_build_requests( + self, operations: list[dict[str, Any]] + ) -> tuple[list[dict[str, Any]], list[str]]: + """ + Validate operations and build API requests. + + Args: + operations: List of operation dictionaries + + Returns: + Tuple of (requests, operation_descriptions) + """ + requests = [] + operation_descriptions = [] + + for i, op in enumerate(operations): + # Validate operation structure + is_valid, error_msg = validate_operation(op) + if not is_valid: + raise ValueError(f"Operation {i + 1}: {error_msg}") + + op_type = op.get("type") + + try: + # Build request based on operation type + result = self._build_operation_request(op, op_type) + + # Handle both single request and list of requests + if isinstance(result[0], list): + # Multiple requests (e.g., replace_text) + for req in result[0]: + requests.append(req) + operation_descriptions.append(result[1]) + elif result[0]: + # Single request + requests.append(result[0]) + operation_descriptions.append(result[1]) + + except KeyError as e: + raise ValueError( + f"Operation {i + 1} ({op_type}) missing required field: {e}" + ) + except Exception as e: + raise ValueError( + f"Operation {i + 1} ({op_type}) failed validation: {str(e)}" + ) + + return requests, operation_descriptions + + def _build_operation_request( + self, op: dict[str, Any], op_type: str + ) -> Tuple[Union[Dict[str, Any], List[Dict[str, Any]]], str]: + """ + Build a single operation request. + + Args: + op: Operation dictionary + op_type: Operation type + + Returns: + Tuple of (request, description) + """ + tab_id = op.get("tab_id") + + if op_type == "insert_text": + request = create_insert_text_request(op["index"], op["text"], tab_id) + description = f"insert text at {op['index']}" + + elif op_type == "delete_text": + request = create_delete_range_request( + op["start_index"], op["end_index"], tab_id + ) + description = f"delete text {op['start_index']}-{op['end_index']}" + + elif op_type == "replace_text": + # Replace is delete + insert (must be done in this order) + delete_request = create_delete_range_request( + op["start_index"], op["end_index"], tab_id + ) + insert_request = create_insert_text_request( + op["start_index"], op["text"], tab_id + ) + # Return both requests as a list + request = [delete_request, insert_request] + description = f"replace text {op['start_index']}-{op['end_index']} with '{op['text'][:20]}{'...' if len(op['text']) > 20 else ''}'" + + elif op_type == "format_text": + request = create_format_text_request( + op["start_index"], + op["end_index"], + op.get("bold"), + op.get("italic"), + op.get("underline"), + op.get("font_size"), + op.get("font_family"), + op.get("text_color"), + op.get("background_color"), + op.get("link_url"), + tab_id, + ) + + if not request: + raise ValueError("No formatting options provided") + + # Build format description + format_changes = [] + for param, name in [ + ("bold", "bold"), + ("italic", "italic"), + ("underline", "underline"), + ("font_size", "font size"), + ("font_family", "font family"), + ("text_color", "text color"), + ("background_color", "background color"), + ("link_url", "link"), + ]: + if op.get(param) is not None: + value = f"{op[param]}pt" if param == "font_size" else op[param] + format_changes.append(f"{name}: {value}") + + description = f"format text {op['start_index']}-{op['end_index']} ({', '.join(format_changes)})" + + elif op_type == "update_paragraph_style": + request = create_update_paragraph_style_request( + op["start_index"], + op["end_index"], + op.get("heading_level"), + op.get("alignment"), + op.get("line_spacing"), + op.get("indent_first_line"), + op.get("indent_start"), + op.get("indent_end"), + op.get("space_above"), + op.get("space_below"), + tab_id, + op.get("named_style_type"), + ) + + if not request: + raise ValueError("No paragraph style options provided") + + _PT_PARAMS = { + "indent_first_line", + "indent_start", + "indent_end", + "space_above", + "space_below", + } + _SUFFIX = { + "heading_level": lambda v: f"H{v}", + "line_spacing": lambda v: f"{v}x", + } + + style_changes = [] + for param, name in [ + ("heading_level", "heading"), + ("alignment", "alignment"), + ("line_spacing", "line spacing"), + ("indent_first_line", "first line indent"), + ("indent_start", "start indent"), + ("indent_end", "end indent"), + ("space_above", "space above"), + ("space_below", "space below"), + ]: + if op.get(param) is not None: + raw = op[param] + fmt = _SUFFIX.get(param) + if fmt: + value = fmt(raw) + elif param in _PT_PARAMS: + value = f"{raw}pt" + else: + value = raw + style_changes.append(f"{name}: {value}") + + description = f"paragraph style {op['start_index']}-{op['end_index']} ({', '.join(style_changes)})" + + elif op_type == "insert_table": + request = create_insert_table_request( + op["index"], op["rows"], op["columns"], tab_id + ) + description = f"insert {op['rows']}x{op['columns']} table at {op['index']}" + + elif op_type == "insert_page_break": + request = create_insert_page_break_request(op["index"], tab_id) + description = f"insert page break at {op['index']}" + + elif op_type == "find_replace": + request = create_find_replace_request( + op["find_text"], op["replace_text"], op.get("match_case", False), tab_id + ) + description = f"find/replace '{op['find_text']}' → '{op['replace_text']}'" + + elif op_type == "create_bullet_list": + list_type = op.get("list_type", "UNORDERED") + if list_type not in ("UNORDERED", "ORDERED", "NONE"): + raise ValueError( + f"Invalid list_type '{list_type}'. Must be 'UNORDERED', 'ORDERED', or 'NONE'" + ) + if list_type == "NONE": + request = create_delete_bullet_list_request( + op["start_index"], op["end_index"], tab_id + ) + description = f"remove bullets {op['start_index']}-{op['end_index']}" + else: + request = create_bullet_list_request( + op["start_index"], + op["end_index"], + list_type, + op.get("nesting_level"), + op.get("paragraph_start_indices"), + tab_id, + ) + style = "bulleted" if list_type == "UNORDERED" else "numbered" + description = ( + f"create {style} list {op['start_index']}-{op['end_index']}" + ) + if op.get("nesting_level"): + description += f" (nesting level {op['nesting_level']})" + + elif op_type == "insert_doc_tab": + request = create_insert_doc_tab_request( + op["title"], op["index"], op.get("parent_tab_id") + ) + description = f"insert tab '{op['title']}' at {op['index']}" + if op.get("parent_tab_id"): + description += f" under parent tab {op['parent_tab_id']}" + + elif op_type == "delete_doc_tab": + request = create_delete_doc_tab_request(op["tab_id"]) + description = f"delete tab '{op['tab_id']}'" + + elif op_type == "update_doc_tab": + request = create_update_doc_tab_request(op["tab_id"], op["title"]) + description = f"rename tab '{op['tab_id']}' to '{op['title']}'" + + else: + supported_types = [ + "insert_text", + "delete_text", + "replace_text", + "format_text", + "update_paragraph_style", + "insert_table", + "insert_page_break", + "find_replace", + "create_bullet_list", + "insert_doc_tab", + "delete_doc_tab", + "update_doc_tab", + ] + raise ValueError( + f"Unsupported operation type '{op_type}'. Supported: {', '.join(supported_types)}" + ) + + return request, description + + async def _execute_batch_requests( + self, document_id: str, requests: list[dict[str, Any]] + ) -> dict[str, Any]: + """ + Execute the batch requests against the Google Docs API. + + Args: + document_id: Document ID + requests: List of API requests + + Returns: + API response + """ + return await asyncio.to_thread( + self.service.documents() + .batchUpdate(documentId=document_id, body={"requests": requests}) + .execute + ) + + def _extract_created_tabs(self, result: dict[str, Any]) -> list[dict[str, str]]: + """ + Extract tab IDs from insert_doc_tab replies in the batchUpdate response. + + Args: + result: The batchUpdate API response + + Returns: + List of dicts with tab_id and title for each created tab + """ + created_tabs = [] + for reply in result.get("replies", []): + if "createDocumentTab" in reply: + props = reply["createDocumentTab"].get("tabProperties", {}) + tab_id = props.get("tabId") + title = props.get("title", "") + if tab_id: + created_tabs.append({"tab_id": tab_id, "title": title}) + return created_tabs + + def _build_operation_summary(self, operation_descriptions: list[str]) -> str: + """ + Build a concise summary of operations performed. + + Args: + operation_descriptions: List of operation descriptions + + Returns: + Summary string + """ + if not operation_descriptions: + return "no operations" + + summary_items = operation_descriptions[:3] # Show first 3 operations + summary = ", ".join(summary_items) + + if len(operation_descriptions) > 3: + remaining = len(operation_descriptions) - 3 + summary += f" and {remaining} more operation{'s' if remaining > 1 else ''}" + + return summary + + def get_supported_operations(self) -> dict[str, Any]: + """ + Get information about supported batch operations. + + Returns: + Dictionary with supported operation types and their required parameters + """ + return { + "supported_operations": { + "insert_text": { + "required": ["index", "text"], + "description": "Insert text at specified index", + }, + "delete_text": { + "required": ["start_index", "end_index"], + "description": "Delete text in specified range", + }, + "replace_text": { + "required": ["start_index", "end_index", "text"], + "description": "Replace text in range with new text", + }, + "format_text": { + "required": ["start_index", "end_index"], + "optional": [ + "bold", + "italic", + "underline", + "font_size", + "font_family", + "text_color", + "background_color", + "link_url", + ], + "description": "Apply formatting to text range", + }, + "update_paragraph_style": { + "required": ["start_index", "end_index"], + "optional": [ + "heading_level", + "alignment", + "line_spacing", + "indent_first_line", + "indent_start", + "indent_end", + "space_above", + "space_below", + "named_style_type", + ], + "description": "Apply paragraph-level styling (headings, alignment, spacing, indentation)", + }, + "insert_table": { + "required": ["index", "rows", "columns"], + "description": "Insert table at specified index", + }, + "insert_page_break": { + "required": ["index"], + "description": "Insert page break at specified index", + }, + "find_replace": { + "required": ["find_text", "replace_text"], + "optional": ["match_case"], + "description": "Find and replace text throughout document", + }, + "create_bullet_list": { + "required": ["start_index", "end_index"], + "optional": [ + "list_type", + "nesting_level", + "paragraph_start_indices", + ], + "description": "Apply or remove native bullet/numbered list formatting (list_type: UNORDERED, ORDERED, or NONE to remove; nesting_level: 0-8)", + }, + "insert_doc_tab": { + "required": ["title", "index"], + "description": "Insert a new document tab with given title at specified index", + }, + "delete_doc_tab": { + "required": ["tab_id"], + "description": "Delete a document tab by its ID", + }, + "update_doc_tab": { + "required": ["tab_id", "title"], + "description": "Rename a document tab", + }, + }, + "example_operations": [ + {"type": "insert_text", "index": 1, "text": "Hello World"}, + { + "type": "format_text", + "start_index": 1, + "end_index": 12, + "bold": True, + }, + {"type": "insert_table", "index": 20, "rows": 2, "columns": 3}, + { + "type": "update_paragraph_style", + "start_index": 1, + "end_index": 20, + "heading_level": 1, + "alignment": "CENTER", + }, + ], + } diff --git a/gdocs/managers/header_footer_manager.py b/gdocs/managers/header_footer_manager.py new file mode 100644 index 0000000..50fad88 --- /dev/null +++ b/gdocs/managers/header_footer_manager.py @@ -0,0 +1,339 @@ +""" +Header Footer Manager + +This module provides high-level operations for managing headers and footers +in Google Docs, extracting complex logic from the main tools module. +""" + +import logging +import asyncio +from typing import Any, Optional + +logger = logging.getLogger(__name__) + + +class HeaderFooterManager: + """ + High-level manager for Google Docs header and footer operations. + + Handles complex header/footer operations including: + - Finding and updating existing headers/footers + - Content replacement with proper range calculation + - Section type management + """ + + def __init__(self, service): + """ + Initialize the header footer manager. + + Args: + service: Google Docs API service instance + """ + self.service = service + + async def update_header_footer_content( + self, + document_id: str, + section_type: str, + content: str, + header_footer_type: str = "DEFAULT", + ) -> tuple[bool, str]: + """ + Updates header or footer content in a document. + + This method extracts the complex logic from update_doc_headers_footers tool function. + + Args: + document_id: ID of the document to update + section_type: Type of section ("header" or "footer") + content: New content for the section + header_footer_type: Type of header/footer ("DEFAULT", "FIRST_PAGE_ONLY", "EVEN_PAGE") + + Returns: + Tuple of (success, message) + """ + logger.info(f"Updating {section_type} in document {document_id}") + + # Validate section type + if section_type not in ["header", "footer"]: + return False, "section_type must be 'header' or 'footer'" + + # Validate header/footer type + if header_footer_type not in ["DEFAULT", "FIRST_PAGE_ONLY", "EVEN_PAGE"]: + return ( + False, + "header_footer_type must be 'DEFAULT', 'FIRST_PAGE_ONLY', or 'EVEN_PAGE'", + ) + + try: + # Get document structure + doc = await self._get_document(document_id) + + # Find the target section + target_section, section_id = await self._find_target_section( + doc, section_type, header_footer_type + ) + + if not target_section: + return ( + False, + f"No {section_type} found in document. Please create a {section_type} first in Google Docs.", + ) + + # Update the content + success = await self._replace_section_content( + document_id, target_section, content + ) + + if success: + return True, f"Updated {section_type} content in document {document_id}" + else: + return ( + False, + f"Could not find content structure in {section_type} to update", + ) + + except Exception as e: + logger.error(f"Failed to update {section_type}: {str(e)}") + return False, f"Failed to update {section_type}: {str(e)}" + + async def _get_document(self, document_id: str) -> dict[str, Any]: + """Get the full document data.""" + return await asyncio.to_thread( + self.service.documents().get(documentId=document_id).execute + ) + + async def _find_target_section( + self, doc: dict[str, Any], section_type: str, header_footer_type: str + ) -> tuple[Optional[dict[str, Any]], Optional[str]]: + """ + Find the target header or footer section. + + Args: + doc: Document data + section_type: "header" or "footer" + header_footer_type: Type of header/footer + + Returns: + Tuple of (section_data, section_id) or (None, None) if not found + """ + if section_type == "header": + sections = doc.get("headers", {}) + else: + sections = doc.get("footers", {}) + + # Try to match section based on header_footer_type + # Google Docs API typically uses section IDs that correspond to types + + # First, try to find an exact match based on common patterns + for section_id, section_data in sections.items(): + # Check if section_data contains type information + if "type" in section_data and section_data["type"] == header_footer_type: + return section_data, section_id + + # If no exact match, try pattern matching on section ID + # Google Docs often uses predictable section ID patterns + target_patterns = { + "DEFAULT": ["default", "kix"], # DEFAULT headers often have these patterns + "FIRST_PAGE": ["first", "firstpage"], + "EVEN_PAGE": ["even", "evenpage"], + "FIRST_PAGE_ONLY": ["first", "firstpage"], # Legacy support + } + + patterns = target_patterns.get(header_footer_type, []) + for pattern in patterns: + for section_id, section_data in sections.items(): + if pattern.lower() in section_id.lower(): + return section_data, section_id + + # If still no match, return the first available section as fallback + # This maintains backward compatibility + for section_id, section_data in sections.items(): + return section_data, section_id + + return None, None + + async def _replace_section_content( + self, document_id: str, section: dict[str, Any], new_content: str + ) -> bool: + """ + Replace the content in a header or footer section. + + Args: + document_id: Document ID + section: Section data containing content elements + new_content: New content to insert + + Returns: + True if successful, False otherwise + """ + content_elements = section.get("content", []) + if not content_elements: + return False + + # Find the first paragraph to replace content + first_para = self._find_first_paragraph(content_elements) + if not first_para: + return False + + # Calculate content range + start_index = first_para.get("startIndex", 0) + end_index = first_para.get("endIndex", 0) + + # Build requests to replace content + requests = [] + + # Delete existing content if any (preserve paragraph structure) + if end_index > start_index: + requests.append( + { + "deleteContentRange": { + "range": { + "startIndex": start_index, + "endIndex": end_index - 1, # Keep the paragraph end marker + } + } + } + ) + + # Insert new content + requests.append( + {"insertText": {"location": {"index": start_index}, "text": new_content}} + ) + + try: + await asyncio.to_thread( + self.service.documents() + .batchUpdate(documentId=document_id, body={"requests": requests}) + .execute + ) + return True + + except Exception as e: + logger.error(f"Failed to replace section content: {str(e)}") + return False + + def _find_first_paragraph( + self, content_elements: list[dict[str, Any]] + ) -> Optional[dict[str, Any]]: + """Find the first paragraph element in content.""" + for element in content_elements: + if "paragraph" in element: + return element + return None + + async def get_header_footer_info(self, document_id: str) -> dict[str, Any]: + """ + Get information about all headers and footers in the document. + + Args: + document_id: Document ID + + Returns: + Dictionary with header and footer information + """ + try: + doc = await self._get_document(document_id) + + headers_info = {} + for header_id, header_data in doc.get("headers", {}).items(): + headers_info[header_id] = self._extract_section_info(header_data) + + footers_info = {} + for footer_id, footer_data in doc.get("footers", {}).items(): + footers_info[footer_id] = self._extract_section_info(footer_data) + + return { + "headers": headers_info, + "footers": footers_info, + "has_headers": bool(headers_info), + "has_footers": bool(footers_info), + } + + except Exception as e: + logger.error(f"Failed to get header/footer info: {str(e)}") + return {"error": str(e)} + + def _extract_section_info(self, section_data: dict[str, Any]) -> dict[str, Any]: + """Extract useful information from a header/footer section.""" + content_elements = section_data.get("content", []) + + # Extract text content + text_content = "" + for element in content_elements: + if "paragraph" in element: + para = element["paragraph"] + for para_element in para.get("elements", []): + if "textRun" in para_element: + text_content += para_element["textRun"].get("content", "") + + return { + "content_preview": text_content[:100] if text_content else "(empty)", + "element_count": len(content_elements), + "start_index": content_elements[0].get("startIndex", 0) + if content_elements + else 0, + "end_index": content_elements[-1].get("endIndex", 0) + if content_elements + else 0, + } + + async def create_header_footer( + self, document_id: str, section_type: str, header_footer_type: str = "DEFAULT" + ) -> tuple[bool, str]: + """ + Create a new header or footer section. + + Args: + document_id: Document ID + section_type: "header" or "footer" + header_footer_type: Type of header/footer ("DEFAULT", "FIRST_PAGE", or "EVEN_PAGE") + + Returns: + Tuple of (success, message) + """ + if section_type not in ["header", "footer"]: + return False, "section_type must be 'header' or 'footer'" + + # Map our type names to API type names + type_mapping = { + "DEFAULT": "DEFAULT", + "FIRST_PAGE": "FIRST_PAGE", + "EVEN_PAGE": "EVEN_PAGE", + "FIRST_PAGE_ONLY": "FIRST_PAGE", # Support legacy name + } + + api_type = type_mapping.get(header_footer_type, header_footer_type) + if api_type not in ["DEFAULT", "FIRST_PAGE", "EVEN_PAGE"]: + return ( + False, + "header_footer_type must be 'DEFAULT', 'FIRST_PAGE', or 'EVEN_PAGE'", + ) + + try: + # Build the request + request = {"type": api_type} + + # Create the appropriate request type + if section_type == "header": + batch_request = {"createHeader": request} + else: + batch_request = {"createFooter": request} + + # Execute the request + await asyncio.to_thread( + self.service.documents() + .batchUpdate(documentId=document_id, body={"requests": [batch_request]}) + .execute + ) + + return True, f"Successfully created {section_type} with type {api_type}" + + except Exception as e: + error_msg = str(e) + if "already exists" in error_msg.lower(): + return ( + False, + f"A {section_type} of type {api_type} already exists in the document", + ) + return False, f"Failed to create {section_type}: {error_msg}" diff --git a/gdocs/managers/table_operation_manager.py b/gdocs/managers/table_operation_manager.py new file mode 100644 index 0000000..d28aa90 --- /dev/null +++ b/gdocs/managers/table_operation_manager.py @@ -0,0 +1,405 @@ +""" +Table Operation Manager + +This module provides high-level table operations that orchestrate +multiple Google Docs API calls for complex table manipulations. +""" + +import logging +import asyncio +from typing import List, Dict, Any, Tuple + +from gdocs.docs_helpers import create_insert_table_request +from gdocs.docs_structure import find_tables +from gdocs.docs_tables import validate_table_data + +logger = logging.getLogger(__name__) + + +class TableOperationManager: + """ + High-level manager for Google Docs table operations. + + Handles complex multi-step table operations including: + - Creating tables with data population + - Populating existing tables + - Managing cell-by-cell operations with proper index refreshing + """ + + def __init__(self, service): + """ + Initialize the table operation manager. + + Args: + service: Google Docs API service instance + """ + self.service = service + + async def create_and_populate_table( + self, + document_id: str, + table_data: List[List[str]], + index: int, + bold_headers: bool = True, + tab_id: str = None, + ) -> Tuple[bool, str, Dict[str, Any]]: + """ + Creates a table and populates it with data in a reliable multi-step process. + + This method extracts the complex logic from create_table_with_data tool function. + + Args: + document_id: ID of the document to update + table_data: 2D list of strings for table content + index: Position to insert the table + bold_headers: Whether to make the first row bold + tab_id: Optional tab ID for targeting a specific tab + + Returns: + Tuple of (success, message, metadata) + """ + logger.debug( + f"Creating table at index {index}, dimensions: {len(table_data)}x{len(table_data[0]) if table_data and len(table_data) > 0 else 0}" + ) + + # Validate input data + is_valid, error_msg = validate_table_data(table_data) + if not is_valid: + return False, f"Invalid table data: {error_msg}", {} + + rows = len(table_data) + cols = len(table_data[0]) + + try: + # Step 1: Create empty table + await self._create_empty_table(document_id, index, rows, cols, tab_id) + + # Step 2: Get fresh document structure to find actual cell positions + fresh_tables = await self._get_document_tables(document_id, tab_id) + if not fresh_tables: + return False, "Could not find table after creation", {} + + # Step 3: Populate each cell with proper index refreshing + population_count = await self._populate_table_cells( + document_id, table_data, bold_headers, tab_id + ) + + metadata = { + "rows": rows, + "columns": cols, + "populated_cells": population_count, + "table_index": len(fresh_tables) - 1, + } + + return ( + True, + f"Successfully created {rows}x{cols} table and populated {population_count} cells", + metadata, + ) + + except Exception as e: + logger.error(f"Failed to create and populate table: {str(e)}") + return False, f"Table creation failed: {str(e)}", {} + + async def _create_empty_table( + self, document_id: str, index: int, rows: int, cols: int, tab_id: str = None + ) -> None: + """Create an empty table at the specified index.""" + logger.debug(f"Creating {rows}x{cols} table at index {index}") + + await asyncio.to_thread( + self.service.documents() + .batchUpdate( + documentId=document_id, + body={ + "requests": [create_insert_table_request(index, rows, cols, tab_id)] + }, + ) + .execute + ) + + async def _get_document_tables( + self, document_id: str, tab_id: str = None + ) -> List[Dict[str, Any]]: + """Get fresh document structure and extract table information.""" + doc = await asyncio.to_thread( + self.service.documents() + .get(documentId=document_id, includeTabsContent=True) + .execute + ) + + if tab_id: + tab = self._find_tab(doc.get("tabs", []), tab_id) + if tab and "documentTab" in tab: + doc = doc.copy() + doc["body"] = tab["documentTab"].get("body", {}) + + return find_tables(doc) + + @staticmethod + def _find_tab(tabs: list, target_id: str): + """Recursively find a tab by ID.""" + for tab in tabs: + if tab.get("tabProperties", {}).get("tabId") == target_id: + return tab + if "childTabs" in tab: + found = TableOperationManager._find_tab(tab["childTabs"], target_id) + if found: + return found + return None + + async def _populate_table_cells( + self, + document_id: str, + table_data: List[List[str]], + bold_headers: bool, + tab_id: str = None, + ) -> int: + """ + Populate table cells with data, refreshing structure after each insertion. + + This prevents index shifting issues by getting fresh cell positions + before each insertion. + """ + population_count = 0 + + for row_idx, row_data in enumerate(table_data): + logger.debug(f"Processing row {row_idx}: {len(row_data)} cells") + + for col_idx, cell_text in enumerate(row_data): + if not cell_text: # Skip empty cells + continue + + try: + # CRITICAL: Refresh document structure before each insertion + success = await self._populate_single_cell( + document_id, + row_idx, + col_idx, + cell_text, + bold_headers and row_idx == 0, + tab_id, + ) + + if success: + population_count += 1 + logger.debug(f"Populated cell ({row_idx},{col_idx})") + else: + logger.warning(f"Failed to populate cell ({row_idx},{col_idx})") + + except Exception as e: + logger.error( + f"Error populating cell ({row_idx},{col_idx}): {str(e)}" + ) + + return population_count + + async def _populate_single_cell( + self, + document_id: str, + row_idx: int, + col_idx: int, + cell_text: str, + apply_bold: bool = False, + tab_id: str = None, + ) -> bool: + """ + Populate a single cell with text, with optional bold formatting. + + Returns True if successful, False otherwise. + """ + try: + # Get fresh table structure to avoid index shifting issues + tables = await self._get_document_tables(document_id, tab_id) + if not tables: + return False + + table = tables[-1] # Use the last table (newly created one) + cells = table.get("cells", []) + + # Bounds checking + if row_idx >= len(cells) or col_idx >= len(cells[row_idx]): + logger.error(f"Cell ({row_idx},{col_idx}) out of bounds") + return False + + cell = cells[row_idx][col_idx] + insertion_index = cell.get("insertion_index") + + if not insertion_index: + logger.warning(f"No insertion_index for cell ({row_idx},{col_idx})") + return False + + # Insert text + await asyncio.to_thread( + self.service.documents() + .batchUpdate( + documentId=document_id, + body={ + "requests": [ + { + "insertText": { + "location": {"index": insertion_index}, + "text": cell_text, + } + } + ] + }, + ) + .execute + ) + + # Apply bold formatting if requested + if apply_bold: + await self._apply_bold_formatting( + document_id, insertion_index, insertion_index + len(cell_text) + ) + + return True + + except Exception as e: + logger.error(f"Failed to populate single cell: {str(e)}") + return False + + async def _apply_bold_formatting( + self, document_id: str, start_index: int, end_index: int + ) -> None: + """Apply bold formatting to a text range.""" + await asyncio.to_thread( + self.service.documents() + .batchUpdate( + documentId=document_id, + body={ + "requests": [ + { + "updateTextStyle": { + "range": { + "startIndex": start_index, + "endIndex": end_index, + }, + "textStyle": {"bold": True}, + "fields": "bold", + } + } + ] + }, + ) + .execute + ) + + async def populate_existing_table( + self, + document_id: str, + table_index: int, + table_data: List[List[str]], + clear_existing: bool = False, + ) -> Tuple[bool, str, Dict[str, Any]]: + """ + Populate an existing table with data. + + Args: + document_id: ID of the document + table_index: Index of the table to populate (0-based) + table_data: 2D list of data to insert + clear_existing: Whether to clear existing content first + + Returns: + Tuple of (success, message, metadata) + """ + try: + tables = await self._get_document_tables(document_id) + if table_index >= len(tables): + return ( + False, + f"Table index {table_index} not found. Document has {len(tables)} tables", + {}, + ) + + table_info = tables[table_index] + + # Validate dimensions + table_rows = table_info["rows"] + table_cols = table_info["columns"] + data_rows = len(table_data) + data_cols = len(table_data[0]) if table_data else 0 + + if data_rows > table_rows or data_cols > table_cols: + return ( + False, + f"Data ({data_rows}x{data_cols}) exceeds table dimensions ({table_rows}x{table_cols})", + {}, + ) + + # Populate cells + population_count = await self._populate_existing_table_cells( + document_id, table_index, table_data + ) + + metadata = { + "table_index": table_index, + "populated_cells": population_count, + "table_dimensions": f"{table_rows}x{table_cols}", + "data_dimensions": f"{data_rows}x{data_cols}", + } + + return ( + True, + f"Successfully populated {population_count} cells in existing table", + metadata, + ) + + except Exception as e: + return False, f"Failed to populate existing table: {str(e)}", {} + + async def _populate_existing_table_cells( + self, document_id: str, table_index: int, table_data: List[List[str]] + ) -> int: + """Populate cells in an existing table.""" + population_count = 0 + + for row_idx, row_data in enumerate(table_data): + for col_idx, cell_text in enumerate(row_data): + if not cell_text: + continue + + # Get fresh table structure for each cell + tables = await self._get_document_tables(document_id) + if table_index >= len(tables): + break + + table = tables[table_index] + cells = table.get("cells", []) + + if row_idx >= len(cells) or col_idx >= len(cells[row_idx]): + continue + + cell = cells[row_idx][col_idx] + + # For existing tables, append to existing content + cell_end = cell["end_index"] - 1 # Don't include cell end marker + + try: + await asyncio.to_thread( + self.service.documents() + .batchUpdate( + documentId=document_id, + body={ + "requests": [ + { + "insertText": { + "location": {"index": cell_end}, + "text": cell_text, + } + } + ] + }, + ) + .execute + ) + population_count += 1 + + except Exception as e: + logger.error( + f"Failed to populate existing cell ({row_idx},{col_idx}): {str(e)}" + ) + + return population_count diff --git a/gdocs/managers/validation_manager.py b/gdocs/managers/validation_manager.py new file mode 100644 index 0000000..69ffd21 --- /dev/null +++ b/gdocs/managers/validation_manager.py @@ -0,0 +1,727 @@ +""" +Validation Manager + +This module provides centralized validation logic for Google Docs operations, +extracting validation patterns from individual tool functions. +""" + +import logging +from typing import Dict, Any, List, Tuple, Optional +from urllib.parse import urlparse + +from gdocs.docs_helpers import validate_operation + +logger = logging.getLogger(__name__) + + +class ValidationManager: + """ + Centralized validation manager for Google Docs operations. + + Provides consistent validation patterns and error messages across + all document operations, reducing code duplication and improving + error message quality. + """ + + def __init__(self): + """Initialize the validation manager.""" + self.validation_rules = self._setup_validation_rules() + + def _setup_validation_rules(self) -> Dict[str, Any]: + """Setup validation rules and constraints.""" + return { + "table_max_rows": 1000, + "table_max_columns": 20, + "document_id_pattern": r"^[a-zA-Z0-9-_]+$", + "max_text_length": 1000000, # 1MB text limit + "font_size_range": (1, 400), # Google Docs font size limits + "valid_header_footer_types": ["DEFAULT", "FIRST_PAGE_ONLY", "EVEN_PAGE"], + "valid_section_types": ["header", "footer"], + "valid_list_types": ["UNORDERED", "ORDERED"], + "valid_element_types": ["table", "list", "page_break"], + "valid_alignments": ["START", "CENTER", "END", "JUSTIFIED"], + "heading_level_range": (0, 6), + } + + def validate_document_id(self, document_id: str) -> Tuple[bool, str]: + """ + Validate Google Docs document ID format. + + Args: + document_id: Document ID to validate + + Returns: + Tuple of (is_valid, error_message) + """ + if not document_id: + return False, "Document ID cannot be empty" + + if not isinstance(document_id, str): + return ( + False, + f"Document ID must be a string, got {type(document_id).__name__}", + ) + + # Basic length check (Google Docs IDs are typically 40+ characters) + if len(document_id) < 20: + return False, "Document ID appears too short to be valid" + + return True, "" + + def validate_table_data(self, table_data: List[List[str]]) -> Tuple[bool, str]: + """ + Comprehensive validation for table data format. + + This extracts and centralizes table validation logic from multiple functions. + + Args: + table_data: 2D array of data to validate + + Returns: + Tuple of (is_valid, detailed_error_message) + """ + if not table_data: + return ( + False, + "Table data cannot be empty. Required format: [['col1', 'col2'], ['row1col1', 'row1col2']]", + ) + + if not isinstance(table_data, list): + return ( + False, + f"Table data must be a list, got {type(table_data).__name__}. Required format: [['col1', 'col2'], ['row1col1', 'row1col2']]", + ) + + # Check if it's a 2D list + if not all(isinstance(row, list) for row in table_data): + non_list_rows = [ + i for i, row in enumerate(table_data) if not isinstance(row, list) + ] + return ( + False, + f"All rows must be lists. Rows {non_list_rows} are not lists. Required format: [['col1', 'col2'], ['row1col1', 'row1col2']]", + ) + + # Check for empty rows + if any(len(row) == 0 for row in table_data): + empty_rows = [i for i, row in enumerate(table_data) if len(row) == 0] + return ( + False, + f"Rows cannot be empty. Empty rows found at indices: {empty_rows}", + ) + + # Check column consistency + col_counts = [len(row) for row in table_data] + if len(set(col_counts)) > 1: + return ( + False, + f"All rows must have the same number of columns. Found column counts: {col_counts}. Fix your data structure.", + ) + + rows = len(table_data) + cols = col_counts[0] + + # Check dimension limits + if rows > self.validation_rules["table_max_rows"]: + return ( + False, + f"Too many rows ({rows}). Maximum allowed: {self.validation_rules['table_max_rows']}", + ) + + if cols > self.validation_rules["table_max_columns"]: + return ( + False, + f"Too many columns ({cols}). Maximum allowed: {self.validation_rules['table_max_columns']}", + ) + + # Check cell content types + for row_idx, row in enumerate(table_data): + for col_idx, cell in enumerate(row): + if cell is None: + return ( + False, + f"Cell ({row_idx},{col_idx}) is None. All cells must be strings, use empty string '' for empty cells.", + ) + + if not isinstance(cell, str): + return ( + False, + f"Cell ({row_idx},{col_idx}) is {type(cell).__name__}, not string. All cells must be strings. Value: {repr(cell)}", + ) + + return True, f"Valid table data: {rows}×{cols} table format" + + def validate_text_formatting_params( + self, + bold: Optional[bool] = None, + italic: Optional[bool] = None, + underline: Optional[bool] = None, + font_size: Optional[int] = None, + font_family: Optional[str] = None, + text_color: Optional[str] = None, + background_color: Optional[str] = None, + link_url: Optional[str] = None, + ) -> Tuple[bool, str]: + """ + Validate text formatting parameters. + + Args: + bold: Bold setting + italic: Italic setting + underline: Underline setting + font_size: Font size in points + font_family: Font family name + text_color: Text color in "#RRGGBB" format + background_color: Background color in "#RRGGBB" format + link_url: Hyperlink URL (http/https) + + Returns: + Tuple of (is_valid, error_message) + """ + # Check if at least one formatting option is provided + formatting_params = [ + bold, + italic, + underline, + font_size, + font_family, + text_color, + background_color, + link_url, + ] + if all(param is None for param in formatting_params): + return ( + False, + "At least one formatting parameter must be provided (bold, italic, underline, font_size, font_family, text_color, background_color, or link_url)", + ) + + # Validate boolean parameters + for param, name in [ + (bold, "bold"), + (italic, "italic"), + (underline, "underline"), + ]: + if param is not None and not isinstance(param, bool): + return ( + False, + f"{name} parameter must be boolean (True/False), got {type(param).__name__}", + ) + + # Validate font size + if font_size is not None: + if not isinstance(font_size, int): + return ( + False, + f"font_size must be an integer, got {type(font_size).__name__}", + ) + + min_size, max_size = self.validation_rules["font_size_range"] + if not (min_size <= font_size <= max_size): + return ( + False, + f"font_size must be between {min_size} and {max_size} points, got {font_size}", + ) + + # Validate font family + if font_family is not None: + if not isinstance(font_family, str): + return ( + False, + f"font_family must be a string, got {type(font_family).__name__}", + ) + + if not font_family.strip(): + return False, "font_family cannot be empty" + + # Validate colors + is_valid, error_msg = self.validate_color_param(text_color, "text_color") + if not is_valid: + return False, error_msg + + is_valid, error_msg = self.validate_color_param( + background_color, "background_color" + ) + if not is_valid: + return False, error_msg + + is_valid, error_msg = self.validate_link_url(link_url) + if not is_valid: + return False, error_msg + + return True, "" + + def validate_link_url(self, link_url: Optional[str]) -> Tuple[bool, str]: + """Validate hyperlink URL parameters.""" + if link_url is None: + return True, "" + + if not isinstance(link_url, str): + return False, f"link_url must be a string, got {type(link_url).__name__}" + + if not link_url.strip(): + return False, "link_url cannot be empty" + + parsed = urlparse(link_url) + if parsed.scheme not in ("http", "https"): + return False, "link_url must start with http:// or https://" + + if not parsed.netloc: + return False, "link_url must include a valid host" + + return True, "" + + def validate_paragraph_style_params( + self, + heading_level: Optional[int] = None, + alignment: Optional[str] = None, + line_spacing: Optional[float] = None, + indent_first_line: Optional[float] = None, + indent_start: Optional[float] = None, + indent_end: Optional[float] = None, + space_above: Optional[float] = None, + space_below: Optional[float] = None, + named_style_type: Optional[str] = None, + ) -> Tuple[bool, str]: + """ + Validate paragraph style parameters. + + Args: + heading_level: Heading level 0-6 (0 = NORMAL_TEXT, 1-6 = HEADING_N) + alignment: Text alignment - 'START', 'CENTER', 'END', or 'JUSTIFIED' + line_spacing: Line spacing multiplier (must be positive) + indent_first_line: First line indent in points + indent_start: Left/start indent in points + indent_end: Right/end indent in points + space_above: Space above paragraph in points + space_below: Space below paragraph in points + named_style_type: Direct named style (TITLE, SUBTITLE, HEADING_1..6, NORMAL_TEXT) + + Returns: + Tuple of (is_valid, error_message) + """ + style_params = [ + heading_level, + alignment, + line_spacing, + indent_first_line, + indent_start, + indent_end, + space_above, + space_below, + named_style_type, + ] + if all(param is None for param in style_params): + return ( + False, + "At least one paragraph style parameter must be provided (heading_level, alignment, line_spacing, indent_first_line, indent_start, indent_end, space_above, space_below, or named_style_type)", + ) + + if heading_level is not None and named_style_type is not None: + return ( + False, + "heading_level and named_style_type are mutually exclusive; provide only one", + ) + + if named_style_type is not None: + valid_styles = [ + "NORMAL_TEXT", + "TITLE", + "SUBTITLE", + "HEADING_1", + "HEADING_2", + "HEADING_3", + "HEADING_4", + "HEADING_5", + "HEADING_6", + ] + if named_style_type not in valid_styles: + return ( + False, + f"Invalid named_style_type '{named_style_type}'. Must be one of: {', '.join(valid_styles)}", + ) + + if heading_level is not None: + if not isinstance(heading_level, int): + return ( + False, + f"heading_level must be an integer, got {type(heading_level).__name__}", + ) + min_level, max_level = self.validation_rules["heading_level_range"] + if not (min_level <= heading_level <= max_level): + return ( + False, + f"heading_level must be between {min_level} and {max_level}, got {heading_level}", + ) + + if alignment is not None: + if not isinstance(alignment, str): + return ( + False, + f"alignment must be a string, got {type(alignment).__name__}", + ) + valid = self.validation_rules["valid_alignments"] + if alignment.upper() not in valid: + return ( + False, + f"alignment must be one of: {', '.join(valid)}, got '{alignment}'", + ) + + if line_spacing is not None: + if not isinstance(line_spacing, (int, float)): + return ( + False, + f"line_spacing must be a number, got {type(line_spacing).__name__}", + ) + if line_spacing <= 0: + return False, "line_spacing must be positive" + + for param, name in [ + (indent_first_line, "indent_first_line"), + (indent_start, "indent_start"), + (indent_end, "indent_end"), + (space_above, "space_above"), + (space_below, "space_below"), + ]: + if param is not None: + if not isinstance(param, (int, float)): + return ( + False, + f"{name} must be a number, got {type(param).__name__}", + ) + # indent_first_line may be negative (hanging indent) + if name != "indent_first_line" and param < 0: + return False, f"{name} must be non-negative, got {param}" + + return True, "" + + def validate_color_param( + self, color: Optional[str], param_name: str + ) -> Tuple[bool, str]: + """Validate color parameters (hex string "#RRGGBB").""" + if color is None: + return True, "" + + if not isinstance(color, str): + return False, f"{param_name} must be a hex string like '#RRGGBB'" + + if len(color) != 7 or not color.startswith("#"): + return False, f"{param_name} must be a hex string like '#RRGGBB'" + + hex_color = color[1:] + if any(c not in "0123456789abcdefABCDEF" for c in hex_color): + return False, f"{param_name} must be a hex string like '#RRGGBB'" + + return True, "" + + def validate_index(self, index: int, context: str = "Index") -> Tuple[bool, str]: + """ + Validate a single document index. + + Args: + index: Index to validate + context: Context description for error messages + + Returns: + Tuple of (is_valid, error_message) + """ + if not isinstance(index, int): + return False, f"{context} must be an integer, got {type(index).__name__}" + + if index < 0: + return ( + False, + f"{context} {index} is negative. You MUST call inspect_doc_structure first to get the proper insertion index.", + ) + + return True, "" + + def validate_index_range( + self, + start_index: int, + end_index: Optional[int] = None, + document_length: Optional[int] = None, + ) -> Tuple[bool, str]: + """ + Validate document index ranges. + + Args: + start_index: Starting index + end_index: Ending index (optional) + document_length: Total document length for bounds checking + + Returns: + Tuple of (is_valid, error_message) + """ + # Validate start_index + if not isinstance(start_index, int): + return ( + False, + f"start_index must be an integer, got {type(start_index).__name__}", + ) + + if start_index < 0: + return False, f"start_index cannot be negative, got {start_index}" + + # Validate end_index if provided + if end_index is not None: + if not isinstance(end_index, int): + return ( + False, + f"end_index must be an integer, got {type(end_index).__name__}", + ) + + if end_index <= start_index: + return ( + False, + f"end_index ({end_index}) must be greater than start_index ({start_index})", + ) + + # Validate against document length if provided + if document_length is not None: + if start_index >= document_length: + return ( + False, + f"start_index ({start_index}) exceeds document length ({document_length})", + ) + + if end_index is not None and end_index > document_length: + return ( + False, + f"end_index ({end_index}) exceeds document length ({document_length})", + ) + + return True, "" + + def validate_element_insertion_params( + self, element_type: str, index: int, **kwargs + ) -> Tuple[bool, str]: + """ + Validate parameters for element insertion. + + Args: + element_type: Type of element to insert + index: Insertion index + **kwargs: Additional parameters specific to element type + + Returns: + Tuple of (is_valid, error_message) + """ + # Validate element type + if element_type not in self.validation_rules["valid_element_types"]: + valid_types = ", ".join(self.validation_rules["valid_element_types"]) + return ( + False, + f"Invalid element_type '{element_type}'. Must be one of: {valid_types}", + ) + + # Validate index + if not isinstance(index, int) or index < 0: + return False, f"index must be a non-negative integer, got {index}" + + # Validate element-specific parameters + if element_type == "table": + rows = kwargs.get("rows") + columns = kwargs.get("columns") + + if not rows or not columns: + return False, "Table insertion requires 'rows' and 'columns' parameters" + + if not isinstance(rows, int) or not isinstance(columns, int): + return False, "Table rows and columns must be integers" + + if rows <= 0 or columns <= 0: + return False, "Table rows and columns must be positive integers" + + if rows > self.validation_rules["table_max_rows"]: + return ( + False, + f"Too many rows ({rows}). Maximum: {self.validation_rules['table_max_rows']}", + ) + + if columns > self.validation_rules["table_max_columns"]: + return ( + False, + f"Too many columns ({columns}). Maximum: {self.validation_rules['table_max_columns']}", + ) + + elif element_type == "list": + list_type = kwargs.get("list_type") + + if not list_type: + return False, "List insertion requires 'list_type' parameter" + + if list_type not in self.validation_rules["valid_list_types"]: + valid_types = ", ".join(self.validation_rules["valid_list_types"]) + return ( + False, + f"Invalid list_type '{list_type}'. Must be one of: {valid_types}", + ) + + return True, "" + + def validate_header_footer_params( + self, section_type: str, header_footer_type: str = "DEFAULT" + ) -> Tuple[bool, str]: + """ + Validate header/footer operation parameters. + + Args: + section_type: Type of section ("header" or "footer") + header_footer_type: Specific header/footer type + + Returns: + Tuple of (is_valid, error_message) + """ + if section_type not in self.validation_rules["valid_section_types"]: + valid_types = ", ".join(self.validation_rules["valid_section_types"]) + return ( + False, + f"section_type must be one of: {valid_types}, got '{section_type}'", + ) + + if header_footer_type not in self.validation_rules["valid_header_footer_types"]: + valid_types = ", ".join(self.validation_rules["valid_header_footer_types"]) + return ( + False, + f"header_footer_type must be one of: {valid_types}, got '{header_footer_type}'", + ) + + return True, "" + + def validate_batch_operations( + self, operations: List[Dict[str, Any]] + ) -> Tuple[bool, str]: + """ + Validate a list of batch operations. + + Args: + operations: List of operation dictionaries + + Returns: + Tuple of (is_valid, error_message) + """ + if not operations: + return False, "Operations list cannot be empty" + + if not isinstance(operations, list): + return False, f"Operations must be a list, got {type(operations).__name__}" + + # Validate each operation + for i, op in enumerate(operations): + if not isinstance(op, dict): + return ( + False, + f"Operation {i + 1} must be a dictionary, got {type(op).__name__}", + ) + + if "type" not in op: + return False, f"Operation {i + 1} missing required 'type' field" + + # Validate required fields for the operation type + is_valid, error_msg = validate_operation(op) + if not is_valid: + return False, f"Operation {i + 1}: {error_msg}" + + op_type = op["type"] + + if op_type == "format_text": + is_valid, error_msg = self.validate_text_formatting_params( + op.get("bold"), + op.get("italic"), + op.get("underline"), + op.get("font_size"), + op.get("font_family"), + op.get("text_color"), + op.get("background_color"), + op.get("link_url"), + ) + if not is_valid: + return False, f"Operation {i + 1} (format_text): {error_msg}" + + is_valid, error_msg = self.validate_index_range( + op["start_index"], op["end_index"] + ) + if not is_valid: + return False, f"Operation {i + 1} (format_text): {error_msg}" + + elif op_type == "update_paragraph_style": + is_valid, error_msg = self.validate_paragraph_style_params( + op.get("heading_level"), + op.get("alignment"), + op.get("line_spacing"), + op.get("indent_first_line"), + op.get("indent_start"), + op.get("indent_end"), + op.get("space_above"), + op.get("space_below"), + op.get("named_style_type"), + ) + if not is_valid: + return ( + False, + f"Operation {i + 1} (update_paragraph_style): {error_msg}", + ) + + is_valid, error_msg = self.validate_index_range( + op["start_index"], op["end_index"] + ) + if not is_valid: + return ( + False, + f"Operation {i + 1} (update_paragraph_style): {error_msg}", + ) + + return True, "" + + def validate_text_content( + self, text: str, max_length: Optional[int] = None + ) -> Tuple[bool, str]: + """ + Validate text content for insertion. + + Args: + text: Text to validate + max_length: Maximum allowed length + + Returns: + Tuple of (is_valid, error_message) + """ + if not isinstance(text, str): + return False, f"Text must be a string, got {type(text).__name__}" + + max_len = max_length or self.validation_rules["max_text_length"] + if len(text) > max_len: + return False, f"Text too long ({len(text)} characters). Maximum: {max_len}" + + return True, "" + + def get_validation_summary(self) -> Dict[str, Any]: + """ + Get a summary of all validation rules and constraints. + + Returns: + Dictionary containing validation rules + """ + return { + "constraints": self.validation_rules.copy(), + "supported_operations": { + "table_operations": ["create_table", "populate_table"], + "text_operations": [ + "insert_text", + "format_text", + "find_replace", + "update_paragraph_style", + ], + "element_operations": [ + "insert_table", + "insert_list", + "insert_page_break", + ], + "header_footer_operations": ["update_header", "update_footer"], + }, + "data_formats": { + "table_data": "2D list of strings: [['col1', 'col2'], ['row1col1', 'row1col2']]", + "text_formatting": "Optional boolean/integer parameters for styling", + "document_indices": "Non-negative integers for position specification", + }, + } diff --git a/gdrive/__init__.py b/gdrive/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gdrive/drive_helpers.py b/gdrive/drive_helpers.py new file mode 100644 index 0000000..55e342a --- /dev/null +++ b/gdrive/drive_helpers.py @@ -0,0 +1,375 @@ +""" +Google Drive Helper Functions + +Shared utilities for Google Drive operations including permission checking. +""" + +import asyncio +import re +from typing import List, Dict, Any, Optional, Tuple + +VALID_SHARE_ROLES = {"reader", "commenter", "writer"} +VALID_SHARE_TYPES = {"user", "group", "domain", "anyone"} + + +def check_public_link_permission(permissions: List[Dict[str, Any]]) -> bool: + """ + Check if file has 'anyone with the link' permission. + + Args: + permissions: List of permission objects from Google Drive API + + Returns: + bool: True if file has public link sharing enabled + """ + return any( + p.get("type") == "anyone" and p.get("role") in ["reader", "writer", "commenter"] + for p in permissions + ) + + +def format_public_sharing_error(file_name: str, file_id: str) -> str: + """ + Format error message for files without public sharing. + + Args: + file_name: Name of the file + file_id: Google Drive file ID + + Returns: + str: Formatted error message + """ + return ( + f"❌ Permission Error: '{file_name}' not shared publicly. " + f"Set 'Anyone with the link' → 'Viewer' in Google Drive sharing. " + f"File: https://drive.google.com/file/d/{file_id}/view" + ) + + +def get_drive_image_url(file_id: str) -> str: + """ + Get the correct Drive URL format for publicly shared images. + + Args: + file_id: Google Drive file ID + + Returns: + str: URL for embedding Drive images + """ + return f"https://drive.google.com/uc?export=view&id={file_id}" + + +def validate_share_role(role: str) -> None: + """ + Validate that the role is valid for sharing. + + Args: + role: The permission role to validate + + Raises: + ValueError: If role is not reader, commenter, or writer + """ + if role not in VALID_SHARE_ROLES: + raise ValueError( + f"Invalid role '{role}'. Must be one of: {', '.join(sorted(VALID_SHARE_ROLES))}" + ) + + +def validate_share_type(share_type: str) -> None: + """ + Validate that the share type is valid. + + Args: + share_type: The type of sharing to validate + + Raises: + ValueError: If share_type is not user, group, domain, or anyone + """ + if share_type not in VALID_SHARE_TYPES: + raise ValueError( + f"Invalid share_type '{share_type}'. Must be one of: {', '.join(sorted(VALID_SHARE_TYPES))}" + ) + + +RFC3339_PATTERN = re.compile( + r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$" +) + + +def validate_expiration_time(expiration_time: str) -> None: + """ + Validate that expiration_time is in RFC 3339 format. + + Args: + expiration_time: The expiration time string to validate + + Raises: + ValueError: If expiration_time is not valid RFC 3339 format + """ + if not RFC3339_PATTERN.match(expiration_time): + raise ValueError( + f"Invalid expiration_time '{expiration_time}'. " + "Must be RFC 3339 format (e.g., '2025-01-15T00:00:00Z')" + ) + + +def format_permission_info(permission: Dict[str, Any]) -> str: + """ + Format a permission object for display. + + Args: + permission: Permission object from Google Drive API + + Returns: + str: Human-readable permission description with ID + """ + perm_type = permission.get("type", "unknown") + role = permission.get("role", "unknown") + perm_id = permission.get("id", "") + + if perm_type == "anyone": + base = f"Anyone with the link ({role}) [id: {perm_id}]" + elif perm_type == "user": + email = permission.get("emailAddress", "unknown") + base = f"User: {email} ({role}) [id: {perm_id}]" + elif perm_type == "group": + email = permission.get("emailAddress", "unknown") + base = f"Group: {email} ({role}) [id: {perm_id}]" + elif perm_type == "domain": + domain = permission.get("domain", "unknown") + base = f"Domain: {domain} ({role}) [id: {perm_id}]" + else: + base = f"{perm_type} ({role}) [id: {perm_id}]" + + extras = [] + if permission.get("expirationTime"): + extras.append(f"expires: {permission['expirationTime']}") + + perm_details = permission.get("permissionDetails", []) + if perm_details: + for detail in perm_details: + if detail.get("inherited") and detail.get("inheritedFrom"): + extras.append(f"inherited from: {detail['inheritedFrom']}") + break + + if extras: + return f"{base} | {', '.join(extras)}" + return base + + +# Precompiled regex patterns for Drive query detection +DRIVE_QUERY_PATTERNS = [ + re.compile(r'\b\w+\s*(=|!=|>|<)\s*[\'"].*?[\'"]', re.IGNORECASE), # field = 'value' + re.compile(r"\b\w+\s*(=|!=|>|<)\s*\d+", re.IGNORECASE), # field = number + re.compile(r"\bcontains\b", re.IGNORECASE), # contains operator + re.compile(r"\bin\s+parents\b", re.IGNORECASE), # in parents + re.compile(r"\bhas\s*\{", re.IGNORECASE), # has {properties} + re.compile(r"\btrashed\s*=\s*(true|false)\b", re.IGNORECASE), # trashed=true/false + re.compile(r"\bstarred\s*=\s*(true|false)\b", re.IGNORECASE), # starred=true/false + re.compile( + r'[\'"][^\'"]+[\'"]\s+in\s+parents', re.IGNORECASE + ), # 'parentId' in parents + re.compile(r"\bfullText\s+contains\b", re.IGNORECASE), # fullText contains + re.compile(r"\bname\s*(=|contains)\b", re.IGNORECASE), # name = or name contains + re.compile(r"\bmimeType\s*(=|!=)\b", re.IGNORECASE), # mimeType operators +] + + +def build_drive_list_params( + query: str, + page_size: int, + drive_id: Optional[str] = None, + include_items_from_all_drives: bool = True, + corpora: Optional[str] = None, + page_token: Optional[str] = None, + detailed: bool = True, +) -> Dict[str, Any]: + """ + Helper function to build common list parameters for Drive API calls. + + Args: + query: The search query string + page_size: Maximum number of items to return + drive_id: Optional shared drive ID + include_items_from_all_drives: Whether to include items from all drives + corpora: Optional corpus specification + page_token: Optional page token for pagination (from a previous nextPageToken) + detailed: Whether to request size, modifiedTime, and webViewLink fields. + Defaults to True to preserve existing behavior. + + Returns: + Dictionary of parameters for Drive API list calls + """ + if detailed: + fields = "nextPageToken, files(id, name, mimeType, webViewLink, iconLink, modifiedTime, size)" + else: + fields = "nextPageToken, files(id, name, mimeType)" + list_params = { + "q": query, + "pageSize": page_size, + "fields": fields, + "supportsAllDrives": True, + "includeItemsFromAllDrives": include_items_from_all_drives, + } + + if page_token: + list_params["pageToken"] = page_token + + if drive_id: + list_params["driveId"] = drive_id + if corpora: + list_params["corpora"] = corpora + else: + list_params["corpora"] = "drive" + elif corpora: + list_params["corpora"] = corpora + + return list_params + + +SHORTCUT_MIME_TYPE = "application/vnd.google-apps.shortcut" +FOLDER_MIME_TYPE = "application/vnd.google-apps.folder" + +# RFC 6838 token-style MIME type validation (safe for Drive query interpolation). +MIME_TYPE_PATTERN = re.compile(r"^[A-Za-z0-9!#$&^_.+-]+/[A-Za-z0-9!#$&^_.+-]+$") + +# Mapping from friendly type names to Google Drive MIME types. +# Raw MIME type strings (containing '/') are always accepted as-is. +FILE_TYPE_MIME_MAP: Dict[str, str] = { + "folder": "application/vnd.google-apps.folder", + "folders": "application/vnd.google-apps.folder", + "document": "application/vnd.google-apps.document", + "doc": "application/vnd.google-apps.document", + "documents": "application/vnd.google-apps.document", + "docs": "application/vnd.google-apps.document", + "spreadsheet": "application/vnd.google-apps.spreadsheet", + "sheet": "application/vnd.google-apps.spreadsheet", + "spreadsheets": "application/vnd.google-apps.spreadsheet", + "sheets": "application/vnd.google-apps.spreadsheet", + "presentation": "application/vnd.google-apps.presentation", + "presentations": "application/vnd.google-apps.presentation", + "slide": "application/vnd.google-apps.presentation", + "slides": "application/vnd.google-apps.presentation", + "form": "application/vnd.google-apps.form", + "forms": "application/vnd.google-apps.form", + "drawing": "application/vnd.google-apps.drawing", + "drawings": "application/vnd.google-apps.drawing", + "pdf": "application/pdf", + "pdfs": "application/pdf", + "shortcut": "application/vnd.google-apps.shortcut", + "shortcuts": "application/vnd.google-apps.shortcut", + "script": "application/vnd.google-apps.script", + "scripts": "application/vnd.google-apps.script", + "site": "application/vnd.google-apps.site", + "sites": "application/vnd.google-apps.site", + "jam": "application/vnd.google-apps.jam", + "jamboard": "application/vnd.google-apps.jam", + "jamboards": "application/vnd.google-apps.jam", +} + + +def resolve_file_type_mime(file_type: str) -> str: + """ + Resolve a friendly file type name or raw MIME type string to a Drive MIME type. + + If `file_type` contains '/' it is returned as-is (treated as a raw MIME type). + Otherwise it is looked up in FILE_TYPE_MIME_MAP. + + Args: + file_type: A friendly name ('folder', 'document', 'pdf', …) or a raw MIME + type string ('application/vnd.google-apps.document', …). + + Returns: + str: The resolved MIME type string. + + Raises: + ValueError: If the value is not a recognised friendly name and contains no '/'. + """ + normalized = file_type.strip() + if not normalized: + raise ValueError("file_type cannot be empty.") + + if "/" in normalized: + normalized_mime = normalized.lower() + if not MIME_TYPE_PATTERN.fullmatch(normalized_mime): + raise ValueError( + f"Invalid MIME type '{file_type}'. Expected format like 'application/pdf'." + ) + return normalized_mime + lower = normalized.lower() + if lower not in FILE_TYPE_MIME_MAP: + valid = ", ".join(sorted(FILE_TYPE_MIME_MAP.keys())) + raise ValueError( + f"Unknown file_type '{file_type}'. Pass a MIME type directly (e.g. " + f"'application/pdf') or use one of the friendly names: {valid}" + ) + return FILE_TYPE_MIME_MAP[lower] + + +BASE_SHORTCUT_FIELDS = ( + "id, mimeType, parents, shortcutDetails(targetId, targetMimeType)" +) + + +async def resolve_drive_item( + service, + file_id: str, + *, + extra_fields: Optional[str] = None, + max_depth: int = 5, +) -> Tuple[str, Dict[str, Any]]: + """ + Resolve a Drive shortcut so downstream callers operate on the real item. + + Returns the resolved file ID and its metadata. Raises if shortcut targets loop + or exceed max_depth to avoid infinite recursion. + """ + current_id = file_id + depth = 0 + fields = BASE_SHORTCUT_FIELDS + if extra_fields: + fields = f"{fields}, {extra_fields}" + + while True: + metadata = await asyncio.to_thread( + service.files() + .get(fileId=current_id, fields=fields, supportsAllDrives=True) + .execute + ) + mime_type = metadata.get("mimeType") + if mime_type != SHORTCUT_MIME_TYPE: + return current_id, metadata + + shortcut_details = metadata.get("shortcutDetails") or {} + target_id = shortcut_details.get("targetId") + if not target_id: + raise Exception(f"Shortcut '{current_id}' is missing target details.") + + depth += 1 + if depth > max_depth: + raise Exception( + f"Shortcut resolution exceeded {max_depth} hops starting from '{file_id}'." + ) + current_id = target_id + + +async def resolve_folder_id( + service, + folder_id: str, + *, + max_depth: int = 5, +) -> str: + """ + Resolve a folder ID that might be a shortcut and ensure the final target is a folder. + """ + resolved_id, metadata = await resolve_drive_item( + service, + folder_id, + max_depth=max_depth, + ) + mime_type = metadata.get("mimeType") + if mime_type != FOLDER_MIME_TYPE: + raise Exception( + f"Resolved ID '{resolved_id}' (from '{folder_id}') is not a folder; mimeType={mime_type}." + ) + return resolved_id diff --git a/gdrive/drive_tools.py b/gdrive/drive_tools.py new file mode 100644 index 0000000..f2120e8 --- /dev/null +++ b/gdrive/drive_tools.py @@ -0,0 +1,2383 @@ +""" +Google Drive MCP Tools + +This module provides MCP tools for interacting with Google Drive API. +""" + +import asyncio +import logging +import io +import httpx +import base64 +import ipaddress +import socket +from contextlib import asynccontextmanager + +from typing import AsyncIterator, Optional, List, Dict, Any +from tempfile import NamedTemporaryFile +from urllib.parse import urljoin, urlparse, urlunparse +from urllib.request import url2pathname +from pathlib import Path + +from googleapiclient.errors import HttpError +from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload + +from auth.service_decorator import require_google_service +from auth.oauth_config import is_stateless_mode +from core.attachment_storage import get_attachment_storage, get_attachment_url +from core.utils import extract_office_xml_text, handle_http_errors, validate_file_path +from core.server import server +from core.config import get_transport_mode +from gdrive.drive_helpers import ( + DRIVE_QUERY_PATTERNS, + FOLDER_MIME_TYPE, + build_drive_list_params, + check_public_link_permission, + format_permission_info, + get_drive_image_url, + resolve_drive_item, + resolve_file_type_mime, + resolve_folder_id, + validate_expiration_time, + validate_share_role, + validate_share_type, +) + +logger = logging.getLogger(__name__) + +DOWNLOAD_CHUNK_SIZE_BYTES = 256 * 1024 # 256 KB +UPLOAD_CHUNK_SIZE_BYTES = 5 * 1024 * 1024 # 5 MB (Google recommended minimum) +MAX_DOWNLOAD_BYTES = 2 * 1024 * 1024 * 1024 # 2 GB safety limit for URL downloads + + +@server.tool() +@handle_http_errors("search_drive_files", is_read_only=True, service_type="drive") +@require_google_service("drive", "drive_read") +async def search_drive_files( + service, + user_google_email: str, + query: str, + page_size: int = 10, + page_token: Optional[str] = None, + drive_id: Optional[str] = None, + include_items_from_all_drives: bool = True, + corpora: Optional[str] = None, + file_type: Optional[str] = None, + detailed: bool = True, +) -> str: + """ + Searches for files and folders within a user's Google Drive, including shared drives. + + Args: + user_google_email (str): The user's Google email address. Required. + query (str): The search query string. Supports Google Drive search operators. + page_size (int): The maximum number of files to return. Defaults to 10. + page_token (Optional[str]): Page token from a previous response's nextPageToken to retrieve the next page of results. + drive_id (Optional[str]): ID of the shared drive to search. If None, behavior depends on `corpora` and `include_items_from_all_drives`. + include_items_from_all_drives (bool): Whether shared drive items should be included in results. Defaults to True. This is effective when not specifying a `drive_id`. + corpora (Optional[str]): Bodies of items to query (e.g., 'user', 'domain', 'drive', 'allDrives'). + If 'drive_id' is specified and 'corpora' is None, it defaults to 'drive'. + Otherwise, Drive API default behavior applies. Prefer 'user' or 'drive' over 'allDrives' for efficiency. + file_type (Optional[str]): Restrict results to a specific file type. Accepts a friendly + name ('folder', 'document'/'doc', 'spreadsheet'/'sheet', + 'presentation'/'slides', 'form', 'drawing', 'pdf', 'shortcut', + 'script', 'site', 'jam'/'jamboard') or any raw MIME type + string (e.g. 'application/pdf'). Defaults to None (all types). + detailed (bool): Whether to include size, modified time, and link in results. Defaults to True. + + Returns: + str: A formatted list of found files/folders with their details (ID, name, type, and optionally size, modified time, link). + Includes a nextPageToken line when more results are available. + """ + logger.info( + f"[search_drive_files] Invoked. Email: '{user_google_email}', Query: '{query}', file_type: '{file_type}'" + ) + + # Check if the query looks like a structured Drive query or free text + # Look for Drive API operators and structured query patterns + is_structured_query = any(pattern.search(query) for pattern in DRIVE_QUERY_PATTERNS) + + if is_structured_query: + final_query = query + logger.info( + f"[search_drive_files] Using structured query as-is: '{final_query}'" + ) + else: + # For free text queries, wrap in fullText contains + escaped_query = query.replace("'", "\\'") + final_query = f"fullText contains '{escaped_query}'" + logger.info( + f"[search_drive_files] Reformatting free text query '{query}' to '{final_query}'" + ) + + if file_type is not None: + mime = resolve_file_type_mime(file_type) + final_query = f"({final_query}) and mimeType = '{mime}'" + logger.info(f"[search_drive_files] Added mimeType filter: '{mime}'") + + list_params = build_drive_list_params( + query=final_query, + page_size=page_size, + drive_id=drive_id, + include_items_from_all_drives=include_items_from_all_drives, + corpora=corpora, + page_token=page_token, + detailed=detailed, + ) + + results = await asyncio.to_thread(service.files().list(**list_params).execute) + files = results.get("files", []) + if not files: + return f"No files found for '{query}'." + + next_token = results.get("nextPageToken") + header = f"Found {len(files)} files for {user_google_email} matching '{query}':" + formatted_files_text_parts = [header] + for item in files: + if detailed: + size_str = f", Size: {item.get('size', 'N/A')}" if "size" in item else "" + formatted_files_text_parts.append( + f'- Name: "{item["name"]}" (ID: {item["id"]}, Type: {item["mimeType"]}{size_str}, Modified: {item.get("modifiedTime", "N/A")}) Link: {item.get("webViewLink", "#")}' + ) + else: + formatted_files_text_parts.append( + f'- Name: "{item["name"]}" (ID: {item["id"]}, Type: {item["mimeType"]})' + ) + if next_token: + formatted_files_text_parts.append(f"nextPageToken: {next_token}") + text_output = "\n".join(formatted_files_text_parts) + return text_output + + +@server.tool() +@handle_http_errors("get_drive_file_content", is_read_only=True, service_type="drive") +@require_google_service("drive", "drive_read") +async def get_drive_file_content( + service, + user_google_email: str, + file_id: str, +) -> str: + """ + Retrieves the content of a specific Google Drive file by ID, supporting files in shared drives. + + • Native Google Docs, Sheets, Slides → exported as text / CSV. + • Office files (.docx, .xlsx, .pptx) → unzipped & parsed with std-lib to + extract readable text. + • Any other file → downloaded; tries UTF-8 decode, else notes binary. + + Args: + user_google_email: The user’s Google email address. + file_id: Drive file ID. + + Returns: + str: The file content as plain text with metadata header. + """ + logger.info(f"[get_drive_file_content] Invoked. File ID: '{file_id}'") + + resolved_file_id, file_metadata = await resolve_drive_item( + service, + file_id, + extra_fields="name, webViewLink", + ) + file_id = resolved_file_id + mime_type = file_metadata.get("mimeType", "") + file_name = file_metadata.get("name", "Unknown File") + export_mime_type = { + "application/vnd.google-apps.document": "text/plain", + "application/vnd.google-apps.spreadsheet": "text/csv", + "application/vnd.google-apps.presentation": "text/plain", + }.get(mime_type) + + request_obj = ( + service.files().export_media(fileId=file_id, mimeType=export_mime_type) + if export_mime_type + else service.files().get_media(fileId=file_id) + ) + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request_obj) + loop = asyncio.get_event_loop() + done = False + while not done: + status, done = await loop.run_in_executor(None, downloader.next_chunk) + + file_content_bytes = fh.getvalue() + + # Attempt Office XML extraction only for actual Office XML files + office_mime_types = { + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + } + + if mime_type in office_mime_types: + office_text = extract_office_xml_text(file_content_bytes, mime_type) + if office_text: + body_text = office_text + else: + # Fallback: try UTF-8; otherwise flag binary + try: + body_text = file_content_bytes.decode("utf-8") + except UnicodeDecodeError: + body_text = ( + f"[Binary or unsupported text encoding for mimeType '{mime_type}' - " + f"{len(file_content_bytes)} bytes]" + ) + else: + # For non-Office files (including Google native files), try UTF-8 decode directly + try: + body_text = file_content_bytes.decode("utf-8") + except UnicodeDecodeError: + body_text = ( + f"[Binary or unsupported text encoding for mimeType '{mime_type}' - " + f"{len(file_content_bytes)} bytes]" + ) + + # Assemble response + header = ( + f'File: "{file_name}" (ID: {file_id}, Type: {mime_type})\n' + f"Link: {file_metadata.get('webViewLink', '#')}\n\n--- CONTENT ---\n" + ) + return header + body_text + + +@server.tool() +@handle_http_errors( + "get_drive_file_download_url", is_read_only=True, service_type="drive" +) +@require_google_service("drive", "drive_read") +async def get_drive_file_download_url( + service, + user_google_email: str, + file_id: str, + export_format: Optional[str] = None, +) -> str: + """ + Downloads a Google Drive file and saves it to local disk. + + In stdio mode, returns the local file path for direct access. + In HTTP mode, returns a temporary download URL (valid for 1 hour). + + For Google native files (Docs, Sheets, Slides), exports to a useful format: + - Google Docs -> PDF (default) or DOCX if export_format='docx' + - Google Sheets -> XLSX (default), PDF if export_format='pdf', or CSV if export_format='csv' + - Google Slides -> PDF (default) or PPTX if export_format='pptx' + + For other files, downloads the original file format. + + Args: + user_google_email: The user's Google email address. Required. + file_id: The Google Drive file ID to download. + export_format: Optional export format for Google native files. + Options: 'pdf', 'docx', 'xlsx', 'csv', 'pptx'. + If not specified, uses sensible defaults (PDF for Docs/Slides, XLSX for Sheets). + For Sheets: supports 'csv', 'pdf', or 'xlsx' (default). + + Returns: + str: File metadata with either a local file path or download URL. + """ + logger.info( + f"[get_drive_file_download_url] Invoked. File ID: '{file_id}', Export format: {export_format}" + ) + + # Resolve shortcuts and get file metadata + resolved_file_id, file_metadata = await resolve_drive_item( + service, + file_id, + extra_fields="name, webViewLink, mimeType", + ) + file_id = resolved_file_id + mime_type = file_metadata.get("mimeType", "") + file_name = file_metadata.get("name", "Unknown File") + + # Determine export format for Google native files + export_mime_type = None + output_filename = file_name + output_mime_type = mime_type + + if mime_type == "application/vnd.google-apps.document": + # Google Docs + if export_format == "docx": + export_mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + output_mime_type = export_mime_type + if not output_filename.endswith(".docx"): + output_filename = f"{Path(output_filename).stem}.docx" + else: + # Default to PDF + export_mime_type = "application/pdf" + output_mime_type = export_mime_type + if not output_filename.endswith(".pdf"): + output_filename = f"{Path(output_filename).stem}.pdf" + + elif mime_type == "application/vnd.google-apps.spreadsheet": + # Google Sheets + if export_format == "csv": + export_mime_type = "text/csv" + output_mime_type = export_mime_type + if not output_filename.endswith(".csv"): + output_filename = f"{Path(output_filename).stem}.csv" + elif export_format == "pdf": + export_mime_type = "application/pdf" + output_mime_type = export_mime_type + if not output_filename.endswith(".pdf"): + output_filename = f"{Path(output_filename).stem}.pdf" + else: + # Default to XLSX + export_mime_type = ( + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ) + output_mime_type = export_mime_type + if not output_filename.endswith(".xlsx"): + output_filename = f"{Path(output_filename).stem}.xlsx" + + elif mime_type == "application/vnd.google-apps.presentation": + # Google Slides + if export_format == "pptx": + export_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation" + output_mime_type = export_mime_type + if not output_filename.endswith(".pptx"): + output_filename = f"{Path(output_filename).stem}.pptx" + else: + # Default to PDF + export_mime_type = "application/pdf" + output_mime_type = export_mime_type + if not output_filename.endswith(".pdf"): + output_filename = f"{Path(output_filename).stem}.pdf" + + # Download the file + request_obj = ( + service.files().export_media(fileId=file_id, mimeType=export_mime_type) + if export_mime_type + else service.files().get_media(fileId=file_id) + ) + + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request_obj) + loop = asyncio.get_event_loop() + done = False + while not done: + status, done = await loop.run_in_executor(None, downloader.next_chunk) + + file_content_bytes = fh.getvalue() + size_bytes = len(file_content_bytes) + size_kb = size_bytes / 1024 if size_bytes else 0 + + # Check if we're in stateless mode (can't save files) + if is_stateless_mode(): + result_lines = [ + "File downloaded successfully!", + f"File: {file_name}", + f"File ID: {file_id}", + f"Size: {size_kb:.1f} KB ({size_bytes} bytes)", + f"MIME Type: {output_mime_type}", + "\n⚠️ Stateless mode: File storage disabled.", + "\nBase64-encoded content (first 100 characters shown):", + f"{base64.b64encode(file_content_bytes[:100]).decode('utf-8')}...", + ] + logger.info( + f"[get_drive_file_download_url] Successfully downloaded {size_kb:.1f} KB file (stateless mode)" + ) + return "\n".join(result_lines) + + # Save file to local disk and return file path + try: + storage = get_attachment_storage() + + # Encode bytes to base64 (as expected by AttachmentStorage) + base64_data = base64.urlsafe_b64encode(file_content_bytes).decode("utf-8") + + # Save attachment to local disk + result = storage.save_attachment( + base64_data=base64_data, + filename=output_filename, + mime_type=output_mime_type, + ) + + result_lines = [ + "File downloaded successfully!", + f"File: {file_name}", + f"File ID: {file_id}", + f"Size: {size_kb:.1f} KB ({size_bytes} bytes)", + f"MIME Type: {output_mime_type}", + ] + + if get_transport_mode() == "stdio": + result_lines.append(f"\n📎 Saved to: {result.path}") + result_lines.append( + "\nThe file has been saved to disk and can be accessed directly via the file path." + ) + else: + download_url = get_attachment_url(result.file_id) + result_lines.append(f"\n📎 Download URL: {download_url}") + result_lines.append("\nThe file will expire after 1 hour.") + + if export_mime_type: + result_lines.append( + f"\nNote: Google native file exported to {output_mime_type} format." + ) + + logger.info( + f"[get_drive_file_download_url] Successfully saved {size_kb:.1f} KB file to {result.path}" + ) + return "\n".join(result_lines) + + except Exception as e: + logger.error(f"[get_drive_file_download_url] Failed to save file: {e}") + return ( + f"Error: Failed to save file for download.\n" + f"File was downloaded successfully ({size_kb:.1f} KB) but could not be saved.\n\n" + f"Error details: {str(e)}" + ) + + +@server.tool() +@handle_http_errors("list_drive_items", is_read_only=True, service_type="drive") +@require_google_service("drive", "drive_read") +async def list_drive_items( + service, + user_google_email: str, + folder_id: str = "root", + page_size: int = 100, + page_token: Optional[str] = None, + drive_id: Optional[str] = None, + include_items_from_all_drives: bool = True, + corpora: Optional[str] = None, + file_type: Optional[str] = None, + detailed: bool = True, +) -> str: + """ + Lists files and folders, supporting shared drives. + If `drive_id` is specified, lists items within that shared drive. `folder_id` is then relative to that drive (or use drive_id as folder_id for root). + If `drive_id` is not specified, lists items from user's "My Drive" and accessible shared drives (if `include_items_from_all_drives` is True). + + Args: + user_google_email (str): The user's Google email address. Required. + folder_id (str): The ID of the Google Drive folder. Defaults to 'root'. For a shared drive, this can be the shared drive's ID to list its root, or a folder ID within that shared drive. + page_size (int): The maximum number of items to return. Defaults to 100. + page_token (Optional[str]): Page token from a previous response's nextPageToken to retrieve the next page of results. + drive_id (Optional[str]): ID of the shared drive. If provided, the listing is scoped to this drive. + include_items_from_all_drives (bool): Whether items from all accessible shared drives should be included if `drive_id` is not set. Defaults to True. + corpora (Optional[str]): Corpus to query ('user', 'drive', 'allDrives'). If `drive_id` is set and `corpora` is None, 'drive' is used. If None and no `drive_id`, API defaults apply. + file_type (Optional[str]): Restrict results to a specific file type. Accepts a friendly + name ('folder', 'document'/'doc', 'spreadsheet'/'sheet', + 'presentation'/'slides', 'form', 'drawing', 'pdf', 'shortcut', + 'script', 'site', 'jam'/'jamboard') or any raw MIME type + string (e.g. 'application/pdf'). Defaults to None (all types). + detailed (bool): Whether to include size, modified time, and link in results. Defaults to True. + + Returns: + str: A formatted list of files/folders in the specified folder. + Includes a nextPageToken line when more results are available. + """ + logger.info( + f"[list_drive_items] Invoked. Email: '{user_google_email}', Folder ID: '{folder_id}', File Type: '{file_type}'" + ) + + resolved_folder_id = await resolve_folder_id(service, folder_id) + final_query = f"'{resolved_folder_id}' in parents and trashed=false" + + if file_type is not None: + mime = resolve_file_type_mime(file_type) + final_query = f"({final_query}) and mimeType = '{mime}'" + logger.info(f"[list_drive_items] Added mimeType filter: '{mime}'") + + list_params = build_drive_list_params( + query=final_query, + page_size=page_size, + drive_id=drive_id, + include_items_from_all_drives=include_items_from_all_drives, + corpora=corpora, + page_token=page_token, + detailed=detailed, + ) + + results = await asyncio.to_thread(service.files().list(**list_params).execute) + files = results.get("files", []) + if not files: + return f"No items found in folder '{folder_id}'." + + next_token = results.get("nextPageToken") + header = ( + f"Found {len(files)} items in folder '{folder_id}' for {user_google_email}:" + ) + formatted_items_text_parts = [header] + for item in files: + if detailed: + size_str = f", Size: {item.get('size', 'N/A')}" if "size" in item else "" + formatted_items_text_parts.append( + f'- Name: "{item["name"]}" (ID: {item["id"]}, Type: {item["mimeType"]}{size_str}, Modified: {item.get("modifiedTime", "N/A")}) Link: {item.get("webViewLink", "#")}' + ) + else: + formatted_items_text_parts.append( + f'- Name: "{item["name"]}" (ID: {item["id"]}, Type: {item["mimeType"]})' + ) + if next_token: + formatted_items_text_parts.append(f"nextPageToken: {next_token}") + text_output = "\n".join(formatted_items_text_parts) + return text_output + + +async def _create_drive_folder_impl( + service, + user_google_email: str, + folder_name: str, + parent_folder_id: str = "root", +) -> str: + """Internal implementation for create_drive_folder. Used by tests.""" + resolved_folder_id = await resolve_folder_id(service, parent_folder_id) + file_metadata = { + "name": folder_name, + "parents": [resolved_folder_id], + "mimeType": FOLDER_MIME_TYPE, + } + created_file = await asyncio.to_thread( + service.files() + .create( + body=file_metadata, + fields="id, name, webViewLink", + supportsAllDrives=True, + ) + .execute + ) + link = created_file.get("webViewLink", "") + return ( + f"Successfully created folder '{created_file.get('name', folder_name)}' (ID: {created_file.get('id', 'N/A')}) " + f"in folder '{parent_folder_id}' for {user_google_email}. Link: {link}" + ) + + +@server.tool() +@handle_http_errors("create_drive_folder", service_type="drive") +@require_google_service("drive", "drive_file") +async def create_drive_folder( + service, + user_google_email: str, + folder_name: str, + parent_folder_id: str = "root", +) -> str: + """ + Creates a new folder in Google Drive, supporting creation within shared drives. + + Args: + user_google_email (str): The user's Google email address. Required. + folder_name (str): The name for the new folder. + parent_folder_id (str): The ID of the parent folder. Defaults to 'root'. + For shared drives, use a folder ID within that shared drive. + + Returns: + str: Confirmation message with folder name, ID, and link. + """ + logger.info( + f"[create_drive_folder] Invoked. Email: '{user_google_email}', Folder: '{folder_name}', Parent: '{parent_folder_id}'" + ) + return await _create_drive_folder_impl( + service, user_google_email, folder_name, parent_folder_id + ) + + +@server.tool() +@handle_http_errors("create_drive_file", service_type="drive") +@require_google_service("drive", "drive_file") +async def create_drive_file( + service, + user_google_email: str, + file_name: str, + content: Optional[str] = None, # Now explicitly Optional + folder_id: str = "root", + mime_type: str = "text/plain", + fileUrl: Optional[str] = None, # Now explicitly Optional +) -> str: + """ + Creates a new file in Google Drive, supporting creation within shared drives. + Accepts either direct content or a fileUrl to fetch the content from. + + Args: + user_google_email (str): The user's Google email address. Required. + file_name (str): The name for the new file. + content (Optional[str]): If provided, the content to write to the file. + folder_id (str): The ID of the parent folder. Defaults to 'root'. For shared drives, this must be a folder ID within the shared drive. + mime_type (str): The MIME type of the file. Defaults to 'text/plain'. + fileUrl (Optional[str]): If provided, fetches the file content from this URL. Supports file://, http://, and https:// protocols. + + Returns: + str: Confirmation message of the successful file creation with file link. + """ + logger.info( + f"[create_drive_file] Invoked. Email: '{user_google_email}', File Name: {file_name}, Folder ID: {folder_id}, fileUrl: {fileUrl}" + ) + + if content is None and fileUrl is None and mime_type != FOLDER_MIME_TYPE: + raise Exception("You must provide either 'content' or 'fileUrl'.") + + # Create folder (no content or media_body). Prefer create_drive_folder for new code. + if mime_type == FOLDER_MIME_TYPE: + return await _create_drive_folder_impl( + service, user_google_email, file_name, folder_id + ) + + file_data = None + resolved_folder_id = await resolve_folder_id(service, folder_id) + + file_metadata = { + "name": file_name, + "parents": [resolved_folder_id], + "mimeType": mime_type, + } + + # Prefer fileUrl if both are provided + if fileUrl: + logger.info(f"[create_drive_file] Fetching file from URL: {fileUrl}") + + # Check if this is a file:// URL + parsed_url = urlparse(fileUrl) + if parsed_url.scheme == "file": + # Handle file:// URL - read from local filesystem + logger.info( + "[create_drive_file] Detected file:// URL, reading from local filesystem" + ) + transport_mode = get_transport_mode() + running_streamable = transport_mode == "streamable-http" + if running_streamable: + logger.warning( + "[create_drive_file] file:// URL requested while server runs in streamable-http mode. Ensure the file path is accessible to the server (e.g., Docker volume) or use an HTTP(S) URL." + ) + + # Convert file:// URL to a cross-platform local path + raw_path = parsed_url.path or "" + netloc = parsed_url.netloc + if netloc and netloc.lower() != "localhost": + raw_path = f"//{netloc}{raw_path}" + file_path = url2pathname(raw_path) + + # Validate path safety and verify file exists + path_obj = validate_file_path(file_path) + if not path_obj.exists(): + extra = ( + " The server is running via streamable-http, so file:// URLs must point to files inside the container or remote host." + if running_streamable + else "" + ) + raise Exception(f"Local file does not exist: {file_path}.{extra}") + if not path_obj.is_file(): + extra = ( + " In streamable-http/Docker deployments, mount the file into the container or provide an HTTP(S) URL." + if running_streamable + else "" + ) + raise Exception(f"Path is not a file: {file_path}.{extra}") + + logger.info(f"[create_drive_file] Reading local file: {file_path}") + + # Read file and upload + file_data = await asyncio.to_thread(path_obj.read_bytes) + total_bytes = len(file_data) + logger.info(f"[create_drive_file] Read {total_bytes} bytes from local file") + + media = MediaIoBaseUpload( + io.BytesIO(file_data), + mimetype=mime_type, + resumable=True, + chunksize=UPLOAD_CHUNK_SIZE_BYTES, + ) + + logger.info("[create_drive_file] Starting upload to Google Drive...") + created_file = await asyncio.to_thread( + service.files() + .create( + body=file_metadata, + media_body=media, + fields="id, name, webViewLink", + supportsAllDrives=True, + ) + .execute + ) + # Handle HTTP/HTTPS URLs + elif parsed_url.scheme in ("http", "https"): + # when running in stateless mode, deployment may not have access to local file system + if is_stateless_mode(): + resp = await _ssrf_safe_fetch(fileUrl) + if resp.status_code != 200: + raise Exception( + f"Failed to fetch file from URL: {fileUrl} (status {resp.status_code})" + ) + file_data = resp.content + # Try to get MIME type from Content-Type header + content_type = resp.headers.get("Content-Type") + if content_type and content_type != "application/octet-stream": + mime_type = content_type + file_metadata["mimeType"] = content_type + logger.info( + f"[create_drive_file] Using MIME type from Content-Type header: {content_type}" + ) + + media = MediaIoBaseUpload( + io.BytesIO(file_data), + mimetype=mime_type, + resumable=True, + chunksize=UPLOAD_CHUNK_SIZE_BYTES, + ) + + created_file = await asyncio.to_thread( + service.files() + .create( + body=file_metadata, + media_body=media, + fields="id, name, webViewLink", + supportsAllDrives=True, + ) + .execute + ) + else: + # Stream download to temp file with SSRF protection, then upload + with NamedTemporaryFile() as temp_file: + total_bytes = 0 + content_type = None + + async with _ssrf_safe_stream(fileUrl) as resp: + if resp.status_code != 200: + raise Exception( + f"Failed to fetch file from URL: {fileUrl} " + f"(status {resp.status_code})" + ) + + content_type = resp.headers.get("Content-Type") + + async for chunk in resp.aiter_bytes( + chunk_size=DOWNLOAD_CHUNK_SIZE_BYTES + ): + total_bytes += len(chunk) + if total_bytes > MAX_DOWNLOAD_BYTES: + raise Exception( + f"Download exceeded {MAX_DOWNLOAD_BYTES} byte limit" + ) + await asyncio.to_thread(temp_file.write, chunk) + + logger.info( + f"[create_drive_file] Downloaded {total_bytes} bytes " + f"from URL before upload." + ) + + if content_type and content_type != "application/octet-stream": + mime_type = content_type + file_metadata["mimeType"] = mime_type + logger.info( + f"[create_drive_file] Using MIME type from " + f"Content-Type header: {mime_type}" + ) + + # Reset file pointer to beginning for upload + temp_file.seek(0) + + media = MediaIoBaseUpload( + temp_file, + mimetype=mime_type, + resumable=True, + chunksize=UPLOAD_CHUNK_SIZE_BYTES, + ) + + logger.info( + "[create_drive_file] Starting upload to Google Drive..." + ) + created_file = await asyncio.to_thread( + service.files() + .create( + body=file_metadata, + media_body=media, + fields="id, name, webViewLink", + supportsAllDrives=True, + ) + .execute + ) + else: + if not parsed_url.scheme: + raise Exception( + "fileUrl is missing a URL scheme. Use file://, http://, or https://." + ) + raise Exception( + f"Unsupported URL scheme '{parsed_url.scheme}'. Only file://, http://, and https:// are supported." + ) + elif content is not None: + file_data = content.encode("utf-8") + media = io.BytesIO(file_data) + + created_file = await asyncio.to_thread( + service.files() + .create( + body=file_metadata, + media_body=MediaIoBaseUpload(media, mimetype=mime_type, resumable=True), + fields="id, name, webViewLink", + supportsAllDrives=True, + ) + .execute + ) + + link = created_file.get("webViewLink", "No link available") + confirmation_message = f"Successfully created file '{created_file.get('name', file_name)}' (ID: {created_file.get('id', 'N/A')}) in folder '{folder_id}' for {user_google_email}. Link: {link}" + logger.info(f"Successfully created file. Link: {link}") + return confirmation_message + + +# Mapping of file extensions to source MIME types for Google Docs conversion +GOOGLE_DOCS_IMPORT_FORMATS = { + ".md": "text/markdown", + ".markdown": "text/markdown", + ".txt": "text/plain", + ".text": "text/plain", + ".html": "text/html", + ".htm": "text/html", + ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".doc": "application/msword", + ".rtf": "application/rtf", + ".odt": "application/vnd.oasis.opendocument.text", +} + +GOOGLE_DOCS_MIME_TYPE = "application/vnd.google-apps.document" + + +def _resolve_and_validate_host(hostname: str) -> list[str]: + """ + Resolve a hostname to IP addresses and validate none are private/internal. + + Uses getaddrinfo to handle both IPv4 and IPv6. Fails closed on DNS errors. + + Returns: + list[str]: Validated resolved IP address strings. + + Raises: + ValueError: If hostname resolves to private/internal IPs or DNS fails. + """ + if not hostname: + raise ValueError("Invalid URL: no hostname") + + # Block localhost variants + if hostname.lower() in ("localhost", "127.0.0.1", "::1", "0.0.0.0"): + raise ValueError("URLs pointing to localhost are not allowed") + + # Resolve hostname using getaddrinfo (handles both IPv4 and IPv6) + try: + addr_infos = socket.getaddrinfo(hostname, None) + except socket.gaierror as e: + raise ValueError( + f"Cannot resolve hostname '{hostname}': {e}. " + "Refusing request (fail-closed)." + ) + + if not addr_infos: + raise ValueError(f"No addresses found for hostname: {hostname}") + + resolved_ips: list[str] = [] + seen_ips: set[str] = set() + for _family, _type, _proto, _canonname, sockaddr in addr_infos: + ip_str = sockaddr[0] + ip = ipaddress.ip_address(ip_str) + if not ip.is_global: + raise ValueError( + f"URLs pointing to private/internal networks are not allowed: " + f"{hostname} resolves to {ip_str}" + ) + if ip_str not in seen_ips: + seen_ips.add(ip_str) + resolved_ips.append(ip_str) + + return resolved_ips + + +def _validate_url_not_internal(url: str) -> list[str]: + """ + Validate that a URL doesn't point to internal/private networks (SSRF protection). + + Returns: + list[str]: Validated resolved IP addresses for the hostname. + + Raises: + ValueError: If URL points to localhost or private IP ranges. + """ + parsed = urlparse(url) + return _resolve_and_validate_host(parsed.hostname) + + +def _format_host_header(hostname: str, scheme: str, port: Optional[int]) -> str: + """Format the Host header value for IPv4/IPv6 hostnames.""" + host_value = hostname + if ":" in host_value and not host_value.startswith("["): + host_value = f"[{host_value}]" + + is_default_port = (scheme == "http" and (port is None or port == 80)) or ( + scheme == "https" and (port is None or port == 443) + ) + if not is_default_port and port is not None: + host_value = f"{host_value}:{port}" + return host_value + + +def _build_pinned_url(parsed_url, ip_address_str: str) -> str: + """Build a URL that targets a resolved IP while preserving path/query.""" + pinned_host = ip_address_str + if ":" in pinned_host and not pinned_host.startswith("["): + pinned_host = f"[{pinned_host}]" + + userinfo = "" + if parsed_url.username is not None: + userinfo = parsed_url.username + if parsed_url.password is not None: + userinfo += f":{parsed_url.password}" + userinfo += "@" + + port_part = f":{parsed_url.port}" if parsed_url.port is not None else "" + netloc = f"{userinfo}{pinned_host}{port_part}" + + path = parsed_url.path or "/" + return urlunparse( + ( + parsed_url.scheme, + netloc, + path, + parsed_url.params, + parsed_url.query, + parsed_url.fragment, + ) + ) + + +async def _fetch_url_with_pinned_ip(url: str) -> httpx.Response: + """ + Fetch URL content by connecting to a validated, pre-resolved IP address. + + This prevents DNS rebinding between validation and the outbound connection. + """ + parsed_url = urlparse(url) + if parsed_url.scheme not in ("http", "https"): + raise ValueError(f"Only http:// and https:// are supported: {url}") + if not parsed_url.hostname: + raise ValueError(f"Invalid URL: missing hostname ({url})") + + resolved_ips = _validate_url_not_internal(url) + host_header = _format_host_header( + parsed_url.hostname, parsed_url.scheme, parsed_url.port + ) + + last_error: Optional[Exception] = None + for resolved_ip in resolved_ips: + pinned_url = _build_pinned_url(parsed_url, resolved_ip) + try: + async with httpx.AsyncClient( + follow_redirects=False, trust_env=False + ) as client: + request = client.build_request( + "GET", + pinned_url, + headers={"Host": host_header}, + extensions={"sni_hostname": parsed_url.hostname}, + ) + return await client.send(request) + except httpx.HTTPError as exc: + last_error = exc + logger.warning( + f"[ssrf_safe_fetch] Failed request via resolved IP {resolved_ip} for host " + f"{parsed_url.hostname}: {exc}" + ) + + raise Exception( + f"Failed to fetch URL after trying {len(resolved_ips)} validated IP(s): {url}" + ) from last_error + + +async def _ssrf_safe_fetch(url: str, *, stream: bool = False) -> httpx.Response: + """ + Fetch a URL with SSRF protection that covers redirects and DNS rebinding. + + Validates the initial URL and every redirect target against private/internal + networks. Disables automatic redirect following and handles redirects manually. + + Args: + url: The URL to fetch. + stream: If True, returns a streaming response (caller must manage context). + + Returns: + httpx.Response with the final response content. + + Raises: + ValueError: If any URL in the redirect chain points to a private network. + Exception: If the HTTP request fails. + """ + if stream: + raise ValueError("Streaming mode is not supported by _ssrf_safe_fetch.") + + max_redirects = 10 + current_url = url + + for _ in range(max_redirects): + resp = await _fetch_url_with_pinned_ip(current_url) + + if resp.status_code in (301, 302, 303, 307, 308): + location = resp.headers.get("location") + if not location: + raise Exception(f"Redirect with no Location header from {current_url}") + + # Resolve relative redirects against the current URL + location = urljoin(current_url, location) + + redirect_parsed = urlparse(location) + if redirect_parsed.scheme not in ("http", "https"): + raise ValueError( + f"Redirect to disallowed scheme: {redirect_parsed.scheme}" + ) + + current_url = location + continue + + return resp + + raise Exception(f"Too many redirects (max {max_redirects}) fetching {url}") + + +@asynccontextmanager +async def _ssrf_safe_stream(url: str) -> AsyncIterator[httpx.Response]: + """ + SSRF-safe streaming fetch: validates each redirect target against private + networks, then streams the final response body without buffering it all + in memory. + + Usage:: + + async with _ssrf_safe_stream(file_url) as resp: + async for chunk in resp.aiter_bytes(chunk_size=DOWNLOAD_CHUNK_SIZE_BYTES): + ... + """ + max_redirects = 10 + current_url = url + + # Resolve redirects manually so every hop is SSRF-validated + for _ in range(max_redirects): + parsed = urlparse(current_url) + if parsed.scheme not in ("http", "https"): + raise ValueError(f"Only http:// and https:// are supported: {current_url}") + if not parsed.hostname: + raise ValueError(f"Invalid URL: missing hostname ({current_url})") + + resolved_ips = _validate_url_not_internal(current_url) + host_header = _format_host_header(parsed.hostname, parsed.scheme, parsed.port) + + last_error: Optional[Exception] = None + resp: Optional[httpx.Response] = None + for resolved_ip in resolved_ips: + pinned_url = _build_pinned_url(parsed, resolved_ip) + client = httpx.AsyncClient(follow_redirects=False, trust_env=False) + try: + request = client.build_request( + "GET", + pinned_url, + headers={"Host": host_header}, + extensions={"sni_hostname": parsed.hostname}, + ) + resp = await client.send(request, stream=True) + break + except httpx.HTTPError as exc: + last_error = exc + await client.aclose() + logger.warning( + f"[ssrf_safe_stream] Failed via IP {resolved_ip} for " + f"{parsed.hostname}: {exc}" + ) + except Exception: + await client.aclose() + raise + + if resp is None: + raise Exception( + f"Failed to fetch URL after trying {len(resolved_ips)} validated IP(s): " + f"{current_url}" + ) from last_error + + if resp.status_code in (301, 302, 303, 307, 308): + location = resp.headers.get("location") + await resp.aclose() + await client.aclose() + if not location: + raise Exception(f"Redirect with no Location header from {current_url}") + location = urljoin(current_url, location) + redirect_parsed = urlparse(location) + if redirect_parsed.scheme not in ("http", "https"): + raise ValueError( + f"Redirect to disallowed scheme: {redirect_parsed.scheme}" + ) + current_url = location + continue + + # Non-redirect — yield the streaming response + try: + yield resp + finally: + await resp.aclose() + await client.aclose() + return + + raise Exception(f"Too many redirects (max {max_redirects}) fetching {url}") + + +def _detect_source_format(file_name: str, content: Optional[str] = None) -> str: + """ + Detect the source MIME type based on file extension. + Falls back to text/plain if unknown. + """ + ext = Path(file_name).suffix.lower() + if ext in GOOGLE_DOCS_IMPORT_FORMATS: + return GOOGLE_DOCS_IMPORT_FORMATS[ext] + + # If content is provided and looks like markdown, use markdown + if content and (content.startswith("#") or "```" in content or "**" in content): + return "text/markdown" + + return "text/plain" + + +@server.tool() +@handle_http_errors("import_to_google_doc", service_type="drive") +@require_google_service("drive", "drive_file") +async def import_to_google_doc( + service, + user_google_email: str, + file_name: str, + content: Optional[str] = None, + file_path: Optional[str] = None, + file_url: Optional[str] = None, + source_format: Optional[str] = None, + folder_id: str = "root", +) -> str: + """ + Imports a file (Markdown, DOCX, TXT, HTML, RTF, ODT) into Google Docs format with automatic conversion. + + Google Drive automatically converts the source file to native Google Docs format, + preserving formatting like headings, lists, bold, italic, etc. + + Args: + user_google_email (str): The user's Google email address. Required. + file_name (str): The name for the new Google Doc (extension will be ignored). + content (Optional[str]): Text content for text-based formats (MD, TXT, HTML). + file_path (Optional[str]): Local file path for binary formats (DOCX, ODT). Supports file:// URLs. + file_url (Optional[str]): Remote URL to fetch the file from (http/https). + source_format (Optional[str]): Source format hint ('md', 'markdown', 'docx', 'txt', 'html', 'rtf', 'odt'). + Auto-detected from file_name extension if not provided. + folder_id (str): The ID of the parent folder. Defaults to 'root'. + + Returns: + str: Confirmation message with the new Google Doc link. + + Examples: + # Import markdown content directly + import_to_google_doc(file_name="My Doc.md", content="# Title\\n\\nHello **world**") + + # Import a local DOCX file + import_to_google_doc(file_name="Report", file_path="/path/to/report.docx") + + # Import from URL + import_to_google_doc(file_name="Remote Doc", file_url="https://example.com/doc.md") + """ + logger.info( + f"[import_to_google_doc] Invoked. Email: '{user_google_email}', " + f"File Name: '{file_name}', Source Format: '{source_format}', Folder ID: '{folder_id}'" + ) + + # Validate inputs + source_count = sum(1 for x in [content, file_path, file_url] if x is not None) + if source_count == 0: + raise ValueError( + "You must provide one of: 'content', 'file_path', or 'file_url'." + ) + if source_count > 1: + raise ValueError("Provide only one of: 'content', 'file_path', or 'file_url'.") + + # Determine source MIME type + if source_format: + # Normalize format hint + format_key = f".{source_format.lower().lstrip('.')}" + if format_key in GOOGLE_DOCS_IMPORT_FORMATS: + source_mime_type = GOOGLE_DOCS_IMPORT_FORMATS[format_key] + else: + raise ValueError( + f"Unsupported source_format: '{source_format}'. " + f"Supported: {', '.join(ext.lstrip('.') for ext in GOOGLE_DOCS_IMPORT_FORMATS.keys())}" + ) + else: + # Auto-detect from file_name, file_path, or file_url + detection_name = file_path or file_url or file_name + source_mime_type = _detect_source_format(detection_name, content) + + logger.info(f"[import_to_google_doc] Detected source MIME type: {source_mime_type}") + + # Clean up file name (remove extension since it becomes a Google Doc) + doc_name = Path(file_name).stem if Path(file_name).suffix else file_name + + # Resolve folder + resolved_folder_id = await resolve_folder_id(service, folder_id) + + # File metadata - destination is Google Docs format + file_metadata = { + "name": doc_name, + "parents": [resolved_folder_id], + "mimeType": GOOGLE_DOCS_MIME_TYPE, # Target format = Google Docs + } + + file_data: bytes + + # Handle content (string input for text formats) + if content is not None: + file_data = content.encode("utf-8") + logger.info(f"[import_to_google_doc] Using content: {len(file_data)} bytes") + + # Handle file_path (local file) + elif file_path is not None: + parsed_url = urlparse(file_path) + + # Handle file:// URL format + if parsed_url.scheme == "file": + raw_path = parsed_url.path or "" + netloc = parsed_url.netloc + if netloc and netloc.lower() != "localhost": + raw_path = f"//{netloc}{raw_path}" + actual_path = url2pathname(raw_path) + elif parsed_url.scheme == "": + # Regular path + actual_path = file_path + else: + raise ValueError( + f"file_path should be a local path or file:// URL, got: {file_path}" + ) + + path_obj = validate_file_path(actual_path) + if not path_obj.exists(): + raise FileNotFoundError(f"File not found: {actual_path}") + if not path_obj.is_file(): + raise ValueError(f"Path is not a file: {actual_path}") + + file_data = await asyncio.to_thread(path_obj.read_bytes) + logger.info(f"[import_to_google_doc] Read local file: {len(file_data)} bytes") + + # Re-detect format from actual file if not specified + if not source_format: + source_mime_type = _detect_source_format(actual_path) + logger.info( + f"[import_to_google_doc] Re-detected from path: {source_mime_type}" + ) + + # Handle file_url (remote file) + elif file_url is not None: + parsed_url = urlparse(file_url) + if parsed_url.scheme not in ("http", "https"): + raise ValueError(f"file_url must be http:// or https://, got: {file_url}") + + # SSRF protection: block internal/private network URLs and validate redirects + resp = await _ssrf_safe_fetch(file_url) + if resp.status_code != 200: + raise Exception( + f"Failed to fetch file from URL: {file_url} (status {resp.status_code})" + ) + file_data = resp.content + + logger.info( + f"[import_to_google_doc] Downloaded from URL: {len(file_data)} bytes" + ) + + # Re-detect format from URL if not specified + if not source_format: + source_mime_type = _detect_source_format(file_url) + logger.info( + f"[import_to_google_doc] Re-detected from URL: {source_mime_type}" + ) + + # Upload with conversion + media = MediaIoBaseUpload( + io.BytesIO(file_data), + mimetype=source_mime_type, # Source format + resumable=True, + chunksize=UPLOAD_CHUNK_SIZE_BYTES, + ) + + logger.info( + f"[import_to_google_doc] Uploading to Google Drive with conversion: " + f"{source_mime_type} → {GOOGLE_DOCS_MIME_TYPE}" + ) + + created_file = await asyncio.to_thread( + service.files() + .create( + body=file_metadata, + media_body=media, + fields="id, name, webViewLink, mimeType", + supportsAllDrives=True, + ) + .execute + ) + + result_mime = created_file.get("mimeType", "unknown") + if result_mime != GOOGLE_DOCS_MIME_TYPE: + logger.warning( + f"[import_to_google_doc] Conversion may have failed. " + f"Expected {GOOGLE_DOCS_MIME_TYPE}, got {result_mime}" + ) + + link = created_file.get("webViewLink", "No link available") + doc_id = created_file.get("id", "N/A") + + confirmation = ( + f"✅ Successfully imported '{doc_name}' as Google Doc\n" + f" Document ID: {doc_id}\n" + f" Source format: {source_mime_type}\n" + f" Folder: {folder_id}\n" + f" Link: {link}" + ) + + logger.info(f"[import_to_google_doc] Success. Link: {link}") + return confirmation + + +@server.tool() +@handle_http_errors( + "get_drive_file_permissions", is_read_only=True, service_type="drive" +) +@require_google_service("drive", "drive_read") +async def get_drive_file_permissions( + service, + user_google_email: str, + file_id: str, +) -> str: + """ + Gets detailed metadata about a Google Drive file including sharing permissions. + + Args: + user_google_email (str): The user's Google email address. Required. + file_id (str): The ID of the file to check permissions for. + + Returns: + str: Detailed file metadata including sharing status and URLs. + """ + logger.info( + f"[get_drive_file_permissions] Checking file {file_id} for {user_google_email}" + ) + + resolved_file_id, _ = await resolve_drive_item(service, file_id) + file_id = resolved_file_id + + try: + # Get comprehensive file metadata including permissions with details + file_metadata = await asyncio.to_thread( + service.files() + .get( + fileId=file_id, + fields="id, name, mimeType, size, modifiedTime, owners, " + "permissions(id, type, role, emailAddress, domain, expirationTime, permissionDetails), " + "webViewLink, webContentLink, shared, sharingUser, viewersCanCopyContent", + supportsAllDrives=True, + ) + .execute + ) + + # Format the response + output_parts = [ + f"File: {file_metadata.get('name', 'Unknown')}", + f"ID: {file_id}", + f"Type: {file_metadata.get('mimeType', 'Unknown')}", + f"Size: {file_metadata.get('size', 'N/A')} bytes", + f"Modified: {file_metadata.get('modifiedTime', 'N/A')}", + "", + "Sharing Status:", + f" Shared: {file_metadata.get('shared', False)}", + ] + + # Add sharing user if available + sharing_user = file_metadata.get("sharingUser") + if sharing_user: + output_parts.append( + f" Shared by: {sharing_user.get('displayName', 'Unknown')} ({sharing_user.get('emailAddress', 'Unknown')})" + ) + + # Process permissions + permissions = file_metadata.get("permissions", []) + if permissions: + output_parts.append(f" Number of permissions: {len(permissions)}") + output_parts.append(" Permissions:") + for perm in permissions: + output_parts.append(f" - {format_permission_info(perm)}") + else: + output_parts.append(" No additional permissions (private file)") + + # Add URLs + output_parts.extend( + [ + "", + "URLs:", + f" View Link: {file_metadata.get('webViewLink', 'N/A')}", + ] + ) + + # webContentLink is only available for files that can be downloaded + web_content_link = file_metadata.get("webContentLink") + if web_content_link: + output_parts.append(f" Direct Download Link: {web_content_link}") + + has_public_link = check_public_link_permission(permissions) + + if has_public_link: + output_parts.extend( + [ + "", + "✅ This file is shared with 'Anyone with the link' - it can be inserted into Google Docs", + ] + ) + else: + output_parts.extend( + [ + "", + "❌ This file is NOT shared with 'Anyone with the link' - it cannot be inserted into Google Docs", + " To fix: Right-click the file in Google Drive → Share → Anyone with the link → Viewer", + ] + ) + + return "\n".join(output_parts) + + except Exception as e: + logger.error(f"Error getting file permissions: {e}") + return f"Error getting file permissions: {e}" + + +@server.tool() +@handle_http_errors( + "check_drive_file_public_access", is_read_only=True, service_type="drive" +) +@require_google_service("drive", "drive_read") +async def check_drive_file_public_access( + service, + user_google_email: str, + file_name: str, +) -> str: + """ + Searches for a file by name and checks if it has public link sharing enabled. + + Args: + user_google_email (str): The user's Google email address. Required. + file_name (str): The name of the file to check. + + Returns: + str: Information about the file's sharing status and whether it can be used in Google Docs. + """ + logger.info(f"[check_drive_file_public_access] Searching for {file_name}") + + # Search for the file + escaped_name = file_name.replace("'", "\\'") + query = f"name = '{escaped_name}'" + + list_params = { + "q": query, + "pageSize": 10, + "fields": "files(id, name, mimeType, webViewLink)", + "supportsAllDrives": True, + "includeItemsFromAllDrives": True, + } + + results = await asyncio.to_thread(service.files().list(**list_params).execute) + + files = results.get("files", []) + if not files: + return f"No file found with name '{file_name}'" + + if len(files) > 1: + output_parts = [f"Found {len(files)} files with name '{file_name}':"] + for f in files: + output_parts.append(f" - {f['name']} (ID: {f['id']})") + output_parts.append("\nChecking the first file...") + output_parts.append("") + else: + output_parts = [] + + # Check permissions for the first file + file_id = files[0]["id"] + resolved_file_id, _ = await resolve_drive_item(service, file_id) + file_id = resolved_file_id + + # Get detailed permissions + file_metadata = await asyncio.to_thread( + service.files() + .get( + fileId=file_id, + fields="id, name, mimeType, permissions, webViewLink, webContentLink, shared", + supportsAllDrives=True, + ) + .execute + ) + + permissions = file_metadata.get("permissions", []) + + has_public_link = check_public_link_permission(permissions) + + output_parts.extend( + [ + f"File: {file_metadata['name']}", + f"ID: {file_id}", + f"Type: {file_metadata['mimeType']}", + f"Shared: {file_metadata.get('shared', False)}", + "", + ] + ) + + if has_public_link: + output_parts.extend( + [ + "✅ PUBLIC ACCESS ENABLED - This file can be inserted into Google Docs", + f"Use with insert_doc_image_url: {get_drive_image_url(file_id)}", + ] + ) + else: + output_parts.extend( + [ + "❌ NO PUBLIC ACCESS - Cannot insert into Google Docs", + "Fix: Drive → Share → 'Anyone with the link' → 'Viewer'", + ] + ) + + return "\n".join(output_parts) + + +@server.tool() +@handle_http_errors("update_drive_file", is_read_only=False, service_type="drive") +@require_google_service("drive", "drive_file") +async def update_drive_file( + service, + user_google_email: str, + file_id: str, + # File metadata updates + name: Optional[str] = None, + description: Optional[str] = None, + mime_type: Optional[str] = None, + # Folder organization + add_parents: Optional[str] = None, # Comma-separated folder IDs to add + remove_parents: Optional[str] = None, # Comma-separated folder IDs to remove + # File status + starred: Optional[bool] = None, + trashed: Optional[bool] = None, + # Sharing and permissions + writers_can_share: Optional[bool] = None, + copy_requires_writer_permission: Optional[bool] = None, + # Custom properties + properties: Optional[dict] = None, # User-visible custom properties +) -> str: + """ + Updates metadata and properties of a Google Drive file. + + Args: + user_google_email (str): The user's Google email address. Required. + file_id (str): The ID of the file to update. Required. + name (Optional[str]): New name for the file. + description (Optional[str]): New description for the file. + mime_type (Optional[str]): New MIME type (note: changing type may require content upload). + add_parents (Optional[str]): Comma-separated folder IDs to add as parents. + remove_parents (Optional[str]): Comma-separated folder IDs to remove from parents. + starred (Optional[bool]): Whether to star/unstar the file. + trashed (Optional[bool]): Whether to move file to/from trash. + writers_can_share (Optional[bool]): Whether editors can share the file. + copy_requires_writer_permission (Optional[bool]): Whether copying requires writer permission. + properties (Optional[dict]): Custom key-value properties for the file. + + Returns: + str: Confirmation message with details of the updates applied. + """ + logger.info(f"[update_drive_file] Updating file {file_id} for {user_google_email}") + + current_file_fields = ( + "name, description, mimeType, parents, starred, trashed, webViewLink, " + "writersCanShare, copyRequiresWriterPermission, properties" + ) + resolved_file_id, current_file = await resolve_drive_item( + service, + file_id, + extra_fields=current_file_fields, + ) + file_id = resolved_file_id + + # Build the update body with only specified fields + update_body = {} + if name is not None: + update_body["name"] = name + if description is not None: + update_body["description"] = description + if mime_type is not None: + update_body["mimeType"] = mime_type + if starred is not None: + update_body["starred"] = starred + if trashed is not None: + update_body["trashed"] = trashed + if writers_can_share is not None: + update_body["writersCanShare"] = writers_can_share + if copy_requires_writer_permission is not None: + update_body["copyRequiresWriterPermission"] = copy_requires_writer_permission + if properties is not None: + update_body["properties"] = properties + + async def _resolve_parent_arguments(parent_arg: Optional[str]) -> Optional[str]: + if not parent_arg: + return None + parent_ids = [part.strip() for part in parent_arg.split(",") if part.strip()] + if not parent_ids: + return None + + resolved_ids = [] + for parent in parent_ids: + resolved_parent = await resolve_folder_id(service, parent) + resolved_ids.append(resolved_parent) + return ",".join(resolved_ids) + + resolved_add_parents = await _resolve_parent_arguments(add_parents) + resolved_remove_parents = await _resolve_parent_arguments(remove_parents) + + # Build query parameters for parent changes + query_params = { + "fileId": file_id, + "supportsAllDrives": True, + "fields": "id, name, description, mimeType, parents, starred, trashed, webViewLink, writersCanShare, copyRequiresWriterPermission, properties", + } + + if resolved_add_parents: + query_params["addParents"] = resolved_add_parents + if resolved_remove_parents: + query_params["removeParents"] = resolved_remove_parents + + # Only include body if there are updates + if update_body: + query_params["body"] = update_body + + # Perform the update + updated_file = await asyncio.to_thread( + service.files().update(**query_params).execute + ) + + # Build response message + output_parts = [ + f"✅ Successfully updated file: {updated_file.get('name', current_file['name'])}" + ] + output_parts.append(f" File ID: {file_id}") + + # Report what changed + changes = [] + if name is not None and name != current_file.get("name"): + changes.append(f" • Name: '{current_file.get('name')}' → '{name}'") + if description is not None: + old_desc_value = current_file.get("description") + new_desc_value = description + should_report_change = (old_desc_value or "") != (new_desc_value or "") + if should_report_change: + old_desc_display = ( + old_desc_value if old_desc_value not in (None, "") else "(empty)" + ) + new_desc_display = ( + new_desc_value if new_desc_value not in (None, "") else "(empty)" + ) + changes.append(f" • Description: {old_desc_display} → {new_desc_display}") + if add_parents: + changes.append(f" • Added to folder(s): {add_parents}") + if remove_parents: + changes.append(f" • Removed from folder(s): {remove_parents}") + current_starred = current_file.get("starred") + if starred is not None and starred != current_starred: + star_status = "starred" if starred else "unstarred" + changes.append(f" • File {star_status}") + current_trashed = current_file.get("trashed") + if trashed is not None and trashed != current_trashed: + trash_status = "moved to trash" if trashed else "restored from trash" + changes.append(f" • File {trash_status}") + current_writers_can_share = current_file.get("writersCanShare") + if writers_can_share is not None and writers_can_share != current_writers_can_share: + share_status = "can" if writers_can_share else "cannot" + changes.append(f" • Writers {share_status} share the file") + current_copy_requires_writer_permission = current_file.get( + "copyRequiresWriterPermission" + ) + if ( + copy_requires_writer_permission is not None + and copy_requires_writer_permission != current_copy_requires_writer_permission + ): + copy_status = ( + "requires" if copy_requires_writer_permission else "doesn't require" + ) + changes.append(f" • Copying {copy_status} writer permission") + if properties: + changes.append(f" • Updated custom properties: {properties}") + + if changes: + output_parts.append("") + output_parts.append("Changes applied:") + output_parts.extend(changes) + else: + output_parts.append(" (No changes were made)") + + output_parts.append("") + output_parts.append(f"View file: {updated_file.get('webViewLink', '#')}") + + return "\n".join(output_parts) + + +@server.tool() +@handle_http_errors("get_drive_shareable_link", is_read_only=True, service_type="drive") +@require_google_service("drive", "drive_read") +async def get_drive_shareable_link( + service, + user_google_email: str, + file_id: str, +) -> str: + """ + Gets the shareable link for a Google Drive file or folder. + + Args: + user_google_email (str): The user's Google email address. Required. + file_id (str): The ID of the file or folder to get the shareable link for. Required. + + Returns: + str: The shareable links and current sharing status. + """ + logger.info( + f"[get_drive_shareable_link] Invoked. Email: '{user_google_email}', File ID: '{file_id}'" + ) + + resolved_file_id, _ = await resolve_drive_item(service, file_id) + file_id = resolved_file_id + + file_metadata = await asyncio.to_thread( + service.files() + .get( + fileId=file_id, + fields="id, name, mimeType, webViewLink, webContentLink, shared, " + "permissions(id, type, role, emailAddress, domain, expirationTime)", + supportsAllDrives=True, + ) + .execute + ) + + output_parts = [ + f"File: {file_metadata.get('name', 'Unknown')}", + f"ID: {file_id}", + f"Type: {file_metadata.get('mimeType', 'Unknown')}", + f"Shared: {file_metadata.get('shared', False)}", + "", + "Links:", + f" View: {file_metadata.get('webViewLink', 'N/A')}", + ] + + web_content_link = file_metadata.get("webContentLink") + if web_content_link: + output_parts.append(f" Download: {web_content_link}") + + permissions = file_metadata.get("permissions", []) + if permissions: + output_parts.append("") + output_parts.append("Current permissions:") + for perm in permissions: + output_parts.append(f" - {format_permission_info(perm)}") + + return "\n".join(output_parts) + + +@server.tool() +@handle_http_errors("manage_drive_access", is_read_only=False, service_type="drive") +@require_google_service("drive", "drive_file") +async def manage_drive_access( + service, + user_google_email: str, + file_id: str, + action: str, + share_with: Optional[str] = None, + role: Optional[str] = None, + share_type: str = "user", + permission_id: Optional[str] = None, + recipients: Optional[List[Dict[str, Any]]] = None, + send_notification: bool = True, + email_message: Optional[str] = None, + expiration_time: Optional[str] = None, + allow_file_discovery: Optional[bool] = None, + new_owner_email: Optional[str] = None, + move_to_new_owners_root: bool = False, +) -> str: + """ + Consolidated tool for managing Google Drive file and folder access permissions. + + Supports granting, batch-granting, updating, revoking permissions, and + transferring file ownership -- all through a single entry point. + + Args: + user_google_email (str): The user's Google email address. Required. + file_id (str): The ID of the file or folder. Required. + action (str): The access management action to perform. Required. One of: + - "grant": Share with a single user, group, domain, or anyone. + - "grant_batch": Share with multiple recipients in one call. + - "update": Modify an existing permission (role or expiration). + - "revoke": Remove an existing permission. + - "transfer_owner": Transfer file ownership to another user. + share_with (Optional[str]): Email address (user/group), domain name (domain), + or omit for 'anyone'. Used by "grant". + role (Optional[str]): Permission role -- 'reader', 'commenter', or 'writer'. + Used by "grant" (defaults to 'reader') and "update". + share_type (str): Type of sharing -- 'user', 'group', 'domain', or 'anyone'. + Used by "grant". Defaults to 'user'. + permission_id (Optional[str]): The permission ID to modify or remove. + Required for "update" and "revoke" actions. + recipients (Optional[List[Dict[str, Any]]]): List of recipient objects for + "grant_batch". Each should have: email (str), role (str, optional), + share_type (str, optional), expiration_time (str, optional). For domain + shares use 'domain' field instead of 'email'. + send_notification (bool): Whether to send notification emails. Defaults to True. + Used by "grant" and "grant_batch". + email_message (Optional[str]): Custom notification email message. + Used by "grant" and "grant_batch". + expiration_time (Optional[str]): Expiration in RFC 3339 format + (e.g., "2025-01-15T00:00:00Z"). Used by "grant" and "update". + allow_file_discovery (Optional[bool]): For 'domain'/'anyone' shares, whether + the file appears in search. Used by "grant". + new_owner_email (Optional[str]): Email of the new owner. + Required for "transfer_owner". + move_to_new_owners_root (bool): Move file to the new owner's My Drive root. + Defaults to False. Used by "transfer_owner". + + Returns: + str: Confirmation with details of the permission change applied. + """ + valid_actions = ("grant", "grant_batch", "update", "revoke", "transfer_owner") + if action not in valid_actions: + raise ValueError( + f"Invalid action '{action}'. Must be one of: {', '.join(valid_actions)}" + ) + + logger.info( + f"[manage_drive_access] Invoked. Email: '{user_google_email}', " + f"File ID: '{file_id}', Action: '{action}'" + ) + + # --- grant: share with a single recipient --- + if action == "grant": + effective_role = role or "reader" + validate_share_role(effective_role) + validate_share_type(share_type) + + if share_type in ("user", "group") and not share_with: + raise ValueError(f"share_with is required for share_type '{share_type}'") + if share_type == "domain" and not share_with: + raise ValueError( + "share_with (domain name) is required for share_type 'domain'" + ) + + resolved_file_id, file_metadata = await resolve_drive_item( + service, file_id, extra_fields="name, webViewLink" + ) + file_id = resolved_file_id + + permission_body: Dict[str, Any] = { + "type": share_type, + "role": effective_role, + } + if share_type in ("user", "group"): + permission_body["emailAddress"] = share_with + elif share_type == "domain": + permission_body["domain"] = share_with + + if expiration_time: + validate_expiration_time(expiration_time) + permission_body["expirationTime"] = expiration_time + + if share_type in ("domain", "anyone") and allow_file_discovery is not None: + permission_body["allowFileDiscovery"] = allow_file_discovery + + create_params: Dict[str, Any] = { + "fileId": file_id, + "body": permission_body, + "supportsAllDrives": True, + "fields": "id, type, role, emailAddress, domain, expirationTime", + } + if share_type in ("user", "group"): + create_params["sendNotificationEmail"] = send_notification + if email_message: + create_params["emailMessage"] = email_message + + created_permission = await asyncio.to_thread( + service.permissions().create(**create_params).execute + ) + + return "\n".join( + [ + f"Successfully shared '{file_metadata.get('name', 'Unknown')}'", + "", + "Permission created:", + f" - {format_permission_info(created_permission)}", + "", + f"View link: {file_metadata.get('webViewLink', 'N/A')}", + ] + ) + + # --- grant_batch: share with multiple recipients --- + if action == "grant_batch": + if not recipients: + raise ValueError("recipients list is required for 'grant_batch' action") + + resolved_file_id, file_metadata = await resolve_drive_item( + service, file_id, extra_fields="name, webViewLink" + ) + file_id = resolved_file_id + + results: List[str] = [] + success_count = 0 + failure_count = 0 + + for recipient in recipients: + r_share_type = recipient.get("share_type", "user") + + if r_share_type == "domain": + domain = recipient.get("domain") + if not domain: + results.append(" - Skipped: missing domain for domain share") + failure_count += 1 + continue + identifier = domain + else: + r_email = recipient.get("email") + if not r_email: + results.append(" - Skipped: missing email address") + failure_count += 1 + continue + identifier = r_email + + r_role = recipient.get("role", "reader") + try: + validate_share_role(r_role) + except ValueError as e: + results.append(f" - {identifier}: Failed - {e}") + failure_count += 1 + continue + + try: + validate_share_type(r_share_type) + except ValueError as e: + results.append(f" - {identifier}: Failed - {e}") + failure_count += 1 + continue + + r_perm_body: Dict[str, Any] = { + "type": r_share_type, + "role": r_role, + } + if r_share_type == "domain": + r_perm_body["domain"] = identifier + else: + r_perm_body["emailAddress"] = identifier + + if recipient.get("expiration_time"): + try: + validate_expiration_time(recipient["expiration_time"]) + r_perm_body["expirationTime"] = recipient["expiration_time"] + except ValueError as e: + results.append(f" - {identifier}: Failed - {e}") + failure_count += 1 + continue + + r_create_params: Dict[str, Any] = { + "fileId": file_id, + "body": r_perm_body, + "supportsAllDrives": True, + "fields": "id, type, role, emailAddress, domain, expirationTime", + } + if r_share_type in ("user", "group"): + r_create_params["sendNotificationEmail"] = send_notification + if email_message: + r_create_params["emailMessage"] = email_message + + try: + created_perm = await asyncio.to_thread( + service.permissions().create(**r_create_params).execute + ) + results.append(f" - {format_permission_info(created_perm)}") + success_count += 1 + except HttpError as e: + results.append(f" - {identifier}: Failed - {str(e)}") + failure_count += 1 + + output_parts = [ + f"Batch share results for '{file_metadata.get('name', 'Unknown')}'", + "", + f"Summary: {success_count} succeeded, {failure_count} failed", + "", + "Results:", + ] + output_parts.extend(results) + output_parts.extend( + [ + "", + f"View link: {file_metadata.get('webViewLink', 'N/A')}", + ] + ) + return "\n".join(output_parts) + + # --- update: modify an existing permission --- + if action == "update": + if not permission_id: + raise ValueError("permission_id is required for 'update' action") + if not role and not expiration_time: + raise ValueError( + "Must provide at least one of: role, expiration_time for 'update' action" + ) + + if role: + validate_share_role(role) + if expiration_time: + validate_expiration_time(expiration_time) + + resolved_file_id, file_metadata = await resolve_drive_item( + service, file_id, extra_fields="name" + ) + file_id = resolved_file_id + + effective_role = role + if not effective_role: + current_permission = await asyncio.to_thread( + service.permissions() + .get( + fileId=file_id, + permissionId=permission_id, + supportsAllDrives=True, + fields="role", + ) + .execute + ) + effective_role = current_permission.get("role") + + update_body: Dict[str, Any] = {"role": effective_role} + if expiration_time: + update_body["expirationTime"] = expiration_time + + updated_permission = await asyncio.to_thread( + service.permissions() + .update( + fileId=file_id, + permissionId=permission_id, + body=update_body, + supportsAllDrives=True, + fields="id, type, role, emailAddress, domain, expirationTime", + ) + .execute + ) + + return "\n".join( + [ + f"Successfully updated permission on '{file_metadata.get('name', 'Unknown')}'", + "", + "Updated permission:", + f" - {format_permission_info(updated_permission)}", + ] + ) + + # --- revoke: remove an existing permission --- + if action == "revoke": + if not permission_id: + raise ValueError("permission_id is required for 'revoke' action") + + resolved_file_id, file_metadata = await resolve_drive_item( + service, file_id, extra_fields="name" + ) + file_id = resolved_file_id + + await asyncio.to_thread( + service.permissions() + .delete( + fileId=file_id, + permissionId=permission_id, + supportsAllDrives=True, + ) + .execute + ) + + return "\n".join( + [ + f"Successfully removed permission from '{file_metadata.get('name', 'Unknown')}'", + "", + f"Permission ID '{permission_id}' has been revoked.", + ] + ) + + # --- transfer_owner: transfer file ownership --- + # action == "transfer_owner" + if not new_owner_email: + raise ValueError("new_owner_email is required for 'transfer_owner' action") + + resolved_file_id, file_metadata = await resolve_drive_item( + service, file_id, extra_fields="name, owners" + ) + file_id = resolved_file_id + + current_owners = file_metadata.get("owners", []) + current_owner_emails = [o.get("emailAddress", "") for o in current_owners] + + transfer_body: Dict[str, Any] = { + "type": "user", + "role": "owner", + "emailAddress": new_owner_email, + } + + await asyncio.to_thread( + service.permissions() + .create( + fileId=file_id, + body=transfer_body, + transferOwnership=True, + moveToNewOwnersRoot=move_to_new_owners_root, + supportsAllDrives=True, + fields="id, type, role, emailAddress", + ) + .execute + ) + + output_parts = [ + f"Successfully transferred ownership of '{file_metadata.get('name', 'Unknown')}'", + "", + f"New owner: {new_owner_email}", + f"Previous owner(s): {', '.join(current_owner_emails) or 'Unknown'}", + ] + if move_to_new_owners_root: + output_parts.append(f"File moved to {new_owner_email}'s My Drive root.") + output_parts.extend(["", "Note: Previous owner now has editor access."]) + + return "\n".join(output_parts) + + +@server.tool() +@handle_http_errors("copy_drive_file", is_read_only=False, service_type="drive") +@require_google_service("drive", "drive_file") +async def copy_drive_file( + service, + user_google_email: str, + file_id: str, + new_name: Optional[str] = None, + parent_folder_id: str = "root", +) -> str: + """ + Creates a copy of an existing Google Drive file. + + This tool copies the template document to a new location with an optional new name. + The copy maintains all formatting and content from the original file. + + Args: + user_google_email (str): The user's Google email address. Required. + file_id (str): The ID of the file to copy. Required. + new_name (Optional[str]): New name for the copied file. If not provided, uses "Copy of [original name]". + parent_folder_id (str): The ID of the folder where the copy should be created. Defaults to 'root' (My Drive). + + Returns: + str: Confirmation message with details of the copied file and its link. + """ + logger.info( + f"[copy_drive_file] Invoked. Email: '{user_google_email}', File ID: '{file_id}', New name: '{new_name}', Parent folder: '{parent_folder_id}'" + ) + + resolved_file_id, file_metadata = await resolve_drive_item( + service, file_id, extra_fields="name, webViewLink, mimeType" + ) + file_id = resolved_file_id + original_name = file_metadata.get("name", "Unknown File") + + resolved_folder_id = await resolve_folder_id(service, parent_folder_id) + + copy_body = {} + if new_name: + copy_body["name"] = new_name + else: + copy_body["name"] = f"Copy of {original_name}" + + if resolved_folder_id != "root": + copy_body["parents"] = [resolved_folder_id] + + copied_file = await asyncio.to_thread( + service.files() + .copy( + fileId=file_id, + body=copy_body, + supportsAllDrives=True, + fields="id, name, webViewLink, mimeType, parents", + ) + .execute + ) + + output_parts = [ + f"Successfully copied '{original_name}'", + "", + f"Original file ID: {file_id}", + f"New file ID: {copied_file.get('id', 'N/A')}", + f"New file name: {copied_file.get('name', 'Unknown')}", + f"File type: {copied_file.get('mimeType', 'Unknown')}", + f"Location: {parent_folder_id}", + "", + f"View copied file: {copied_file.get('webViewLink', 'N/A')}", + ] + + return "\n".join(output_parts) + + +@server.tool() +@handle_http_errors( + "set_drive_file_permissions", is_read_only=False, service_type="drive" +) +@require_google_service("drive", "drive_file") +async def set_drive_file_permissions( + service, + user_google_email: str, + file_id: str, + link_sharing: Optional[str] = None, + writers_can_share: Optional[bool] = None, + copy_requires_writer_permission: Optional[bool] = None, +) -> str: + """ + Sets file-level sharing settings and controls link sharing for a Google Drive file or folder. + + This is a high-level tool for the most common permission changes. Use this to toggle + "anyone with the link" access or configure file-level sharing behavior. For managing + individual user/group permissions, use share_drive_file or update_drive_permission instead. + + Args: + user_google_email (str): The user's Google email address. Required. + file_id (str): The ID of the file or folder. Required. + link_sharing (Optional[str]): Control "anyone with the link" access for the file. + - "off": Disable "anyone with the link" access for this file. + - "reader": Anyone with the link can view. + - "commenter": Anyone with the link can comment. + - "writer": Anyone with the link can edit. + writers_can_share (Optional[bool]): Whether editors can change permissions and share. + If False, only the owner can share. Defaults to None (no change). + copy_requires_writer_permission (Optional[bool]): Whether viewers and commenters + are prevented from copying, printing, or downloading. Defaults to None (no change). + + Returns: + str: Summary of all permission changes applied to the file. + """ + logger.info( + f"[set_drive_file_permissions] Invoked. Email: '{user_google_email}', " + f"File ID: '{file_id}', Link sharing: '{link_sharing}', " + f"Writers can share: {writers_can_share}, Copy restriction: {copy_requires_writer_permission}" + ) + + if ( + link_sharing is None + and writers_can_share is None + and copy_requires_writer_permission is None + ): + raise ValueError( + "Must provide at least one of: link_sharing, writers_can_share, copy_requires_writer_permission" + ) + + valid_link_sharing = {"off", "reader", "commenter", "writer"} + if link_sharing is not None and link_sharing not in valid_link_sharing: + raise ValueError( + f"Invalid link_sharing '{link_sharing}'. Must be one of: {', '.join(sorted(valid_link_sharing))}" + ) + + resolved_file_id, file_metadata = await resolve_drive_item( + service, file_id, extra_fields="name, webViewLink" + ) + file_id = resolved_file_id + file_name = file_metadata.get("name", "Unknown") + + output_parts = [f"Permission settings updated for '{file_name}'", ""] + changes_made = [] + + # Handle file-level settings via files().update() + file_update_body = {} + if writers_can_share is not None: + file_update_body["writersCanShare"] = writers_can_share + if copy_requires_writer_permission is not None: + file_update_body["copyRequiresWriterPermission"] = ( + copy_requires_writer_permission + ) + + if file_update_body: + await asyncio.to_thread( + service.files() + .update( + fileId=file_id, + body=file_update_body, + supportsAllDrives=True, + fields="id", + ) + .execute + ) + if writers_can_share is not None: + state = "allowed" if writers_can_share else "restricted to owner" + changes_made.append(f" - Editors sharing: {state}") + if copy_requires_writer_permission is not None: + state = "restricted" if copy_requires_writer_permission else "allowed" + changes_made.append(f" - Viewers copy/print/download: {state}") + + # Handle link sharing via permissions API + if link_sharing is not None: + current_permissions = await asyncio.to_thread( + service.permissions() + .list( + fileId=file_id, + supportsAllDrives=True, + fields="permissions(id, type, role)", + ) + .execute + ) + anyone_perms = [ + p + for p in current_permissions.get("permissions", []) + if p.get("type") == "anyone" + ] + + if link_sharing == "off": + if anyone_perms: + for perm in anyone_perms: + await asyncio.to_thread( + service.permissions() + .delete( + fileId=file_id, + permissionId=perm["id"], + supportsAllDrives=True, + ) + .execute + ) + changes_made.append( + " - Link sharing: disabled (restricted to specific people)" + ) + else: + changes_made.append(" - Link sharing: already off (no change)") + else: + if anyone_perms: + await asyncio.to_thread( + service.permissions() + .update( + fileId=file_id, + permissionId=anyone_perms[0]["id"], + body={ + "role": link_sharing, + "allowFileDiscovery": False, + }, + supportsAllDrives=True, + fields="id, type, role", + ) + .execute + ) + changes_made.append(f" - Link sharing: updated to '{link_sharing}'") + else: + await asyncio.to_thread( + service.permissions() + .create( + fileId=file_id, + body={ + "type": "anyone", + "role": link_sharing, + "allowFileDiscovery": False, + }, + supportsAllDrives=True, + fields="id, type, role", + ) + .execute + ) + changes_made.append(f" - Link sharing: enabled as '{link_sharing}'") + + output_parts.append("Changes:") + if changes_made: + output_parts.extend(changes_made) + else: + output_parts.append(" - No changes (already configured)") + output_parts.extend(["", f"View link: {file_metadata.get('webViewLink', 'N/A')}"]) + + return "\n".join(output_parts) diff --git a/gforms/__init__.py b/gforms/__init__.py new file mode 100644 index 0000000..2b54323 --- /dev/null +++ b/gforms/__init__.py @@ -0,0 +1,3 @@ +""" +Google Forms MCP Tools module +""" diff --git a/gforms/forms_tools.py b/gforms/forms_tools.py new file mode 100644 index 0000000..059a585 --- /dev/null +++ b/gforms/forms_tools.py @@ -0,0 +1,487 @@ +""" +Google Forms MCP Tools + +This module provides MCP tools for interacting with Google Forms API. +""" + +import logging +import asyncio +import json +from typing import List, Optional, Dict, Any + + +from auth.service_decorator import require_google_service +from core.server import server +from core.utils import handle_http_errors + +logger = logging.getLogger(__name__) + + +def _extract_option_values(options: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Extract valid option objects from Forms choice option objects. + + Returns the full option dicts (preserving fields like ``isOther``, + ``image``, ``goToAction``, and ``goToSectionId``) while filtering + out entries that lack a truthy ``value``. + """ + return [option for option in options if option.get("value")] + + +def _get_question_type(question: Dict[str, Any]) -> str: + """Infer a stable question/item type label from a Forms question payload.""" + choice_question = question.get("choiceQuestion") + if choice_question: + return choice_question.get("type", "CHOICE") + + text_question = question.get("textQuestion") + if text_question: + return "PARAGRAPH" if text_question.get("paragraph") else "TEXT" + + if "rowQuestion" in question: + return "GRID_ROW" + if "scaleQuestion" in question: + return "SCALE" + if "dateQuestion" in question: + return "DATE" + if "timeQuestion" in question: + return "TIME" + if "fileUploadQuestion" in question: + return "FILE_UPLOAD" + if "ratingQuestion" in question: + return "RATING" + + return "QUESTION" + + +def _serialize_form_item(item: Dict[str, Any], index: int) -> Dict[str, Any]: + """Serialize a Forms item with the key metadata agents need for edits.""" + serialized_item: Dict[str, Any] = { + "index": index, + "itemId": item.get("itemId"), + "title": item.get("title", f"Question {index}"), + } + + if item.get("description"): + serialized_item["description"] = item["description"] + + if "questionItem" in item: + question = item.get("questionItem", {}).get("question", {}) + serialized_item["type"] = _get_question_type(question) + serialized_item["required"] = question.get("required", False) + + question_id = question.get("questionId") + if question_id: + serialized_item["questionId"] = question_id + + choice_question = question.get("choiceQuestion") + if choice_question: + serialized_item["options"] = _extract_option_values( + choice_question.get("options", []) + ) + + return serialized_item + + if "questionGroupItem" in item: + question_group = item.get("questionGroupItem", {}) + columns = _extract_option_values( + question_group.get("grid", {}).get("columns", {}).get("options", []) + ) + + rows = [] + for question in question_group.get("questions", []): + row: Dict[str, Any] = { + "title": question.get("rowQuestion", {}).get("title", "") + } + row_question_id = question.get("questionId") + if row_question_id: + row["questionId"] = row_question_id + row["required"] = question.get("required", False) + rows.append(row) + + serialized_item["type"] = "GRID" + serialized_item["grid"] = {"rows": rows, "columns": columns} + return serialized_item + + if "pageBreakItem" in item: + serialized_item["type"] = "PAGE_BREAK" + elif "textItem" in item: + serialized_item["type"] = "TEXT_ITEM" + elif "imageItem" in item: + serialized_item["type"] = "IMAGE" + elif "videoItem" in item: + serialized_item["type"] = "VIDEO" + else: + serialized_item["type"] = "UNKNOWN" + + return serialized_item + + +@server.tool() +@handle_http_errors("create_form", service_type="forms") +@require_google_service("forms", "forms") +async def create_form( + service, + user_google_email: str, + title: str, + description: Optional[str] = None, + document_title: Optional[str] = None, +) -> str: + """ + Create a new form using the title given in the provided form message in the request. + + Args: + user_google_email (str): The user's Google email address. Required. + title (str): The title of the form. + description (Optional[str]): The description of the form. + document_title (Optional[str]): The document title (shown in browser tab). + + Returns: + str: Confirmation message with form ID and edit URL. + """ + logger.info(f"[create_form] Invoked. Email: '{user_google_email}', Title: {title}") + + form_body: Dict[str, Any] = {"info": {"title": title}} + + if description: + form_body["info"]["description"] = description + + if document_title: + form_body["info"]["document_title"] = document_title + + created_form = await asyncio.to_thread( + service.forms().create(body=form_body).execute + ) + + form_id = created_form.get("formId") + edit_url = f"https://docs.google.com/forms/d/{form_id}/edit" + responder_url = created_form.get( + "responderUri", f"https://docs.google.com/forms/d/{form_id}/viewform" + ) + + confirmation_message = f"Successfully created form '{created_form.get('info', {}).get('title', title)}' for {user_google_email}. Form ID: {form_id}. Edit URL: {edit_url}. Responder URL: {responder_url}" + logger.info(f"Form created successfully for {user_google_email}. ID: {form_id}") + return confirmation_message + + +@server.tool() +@handle_http_errors("get_form", is_read_only=True, service_type="forms") +@require_google_service("forms", "forms") +async def get_form(service, user_google_email: str, form_id: str) -> str: + """ + Get a form. + + Args: + user_google_email (str): The user's Google email address. Required. + form_id (str): The ID of the form to retrieve. + + Returns: + str: Form details including title, description, questions, and URLs. + """ + logger.info(f"[get_form] Invoked. Email: '{user_google_email}', Form ID: {form_id}") + + form = await asyncio.to_thread(service.forms().get(formId=form_id).execute) + + form_info = form.get("info", {}) + title = form_info.get("title", "No Title") + description = form_info.get("description", "No Description") + document_title = form_info.get("documentTitle", title) + + edit_url = f"https://docs.google.com/forms/d/{form_id}/edit" + responder_url = form.get( + "responderUri", f"https://docs.google.com/forms/d/{form_id}/viewform" + ) + + items = form.get("items", []) + serialized_items = [ + _serialize_form_item(item, i) for i, item in enumerate(items, 1) + ] + + items_summary = [] + for serialized_item in serialized_items: + item_index = serialized_item["index"] + item_title = serialized_item.get("title", f"Item {item_index}") + item_type = serialized_item.get("type", "UNKNOWN") + required_text = " (Required)" if serialized_item.get("required") else "" + items_summary.append( + f" {item_index}. {item_title} [{item_type}]{required_text}" + ) + + items_summary_text = ( + "\n".join(items_summary) if items_summary else " No items found" + ) + items_text = json.dumps(serialized_items, indent=2) if serialized_items else "[]" + + result = f"""Form Details for {user_google_email}: +- Title: "{title}" +- Description: "{description}" +- Document Title: "{document_title}" +- Form ID: {form_id} +- Edit URL: {edit_url} +- Responder URL: {responder_url} +- Items ({len(items)} total): +{items_summary_text} +- Items (structured): +{items_text}""" + + logger.info(f"Successfully retrieved form for {user_google_email}. ID: {form_id}") + return result + + +@server.tool() +@handle_http_errors("set_publish_settings", service_type="forms") +@require_google_service("forms", "forms") +async def set_publish_settings( + service, + user_google_email: str, + form_id: str, + publish_as_template: bool = False, + require_authentication: bool = False, +) -> str: + """ + Updates the publish settings of a form. + + Args: + user_google_email (str): The user's Google email address. Required. + form_id (str): The ID of the form to update publish settings for. + publish_as_template (bool): Whether to publish as a template. Defaults to False. + require_authentication (bool): Whether to require authentication to view/submit. Defaults to False. + + Returns: + str: Confirmation message of the successful publish settings update. + """ + logger.info( + f"[set_publish_settings] Invoked. Email: '{user_google_email}', Form ID: {form_id}" + ) + + settings_body = { + "publishAsTemplate": publish_as_template, + "requireAuthentication": require_authentication, + } + + await asyncio.to_thread( + service.forms().setPublishSettings(formId=form_id, body=settings_body).execute + ) + + confirmation_message = f"Successfully updated publish settings for form {form_id} for {user_google_email}. Publish as template: {publish_as_template}, Require authentication: {require_authentication}" + logger.info( + f"Publish settings updated successfully for {user_google_email}. Form ID: {form_id}" + ) + return confirmation_message + + +@server.tool() +@handle_http_errors("get_form_response", is_read_only=True, service_type="forms") +@require_google_service("forms", "forms") +async def get_form_response( + service, user_google_email: str, form_id: str, response_id: str +) -> str: + """ + Get one response from the form. + + Args: + user_google_email (str): The user's Google email address. Required. + form_id (str): The ID of the form. + response_id (str): The ID of the response to retrieve. + + Returns: + str: Response details including answers and metadata. + """ + logger.info( + f"[get_form_response] Invoked. Email: '{user_google_email}', Form ID: {form_id}, Response ID: {response_id}" + ) + + response = await asyncio.to_thread( + service.forms().responses().get(formId=form_id, responseId=response_id).execute + ) + + response_id = response.get("responseId", "Unknown") + create_time = response.get("createTime", "Unknown") + last_submitted_time = response.get("lastSubmittedTime", "Unknown") + + answers = response.get("answers", {}) + answer_details = [] + for question_id, answer_data in answers.items(): + question_response = answer_data.get("textAnswers", {}).get("answers", []) + if question_response: + answer_text = ", ".join([ans.get("value", "") for ans in question_response]) + answer_details.append(f" Question ID {question_id}: {answer_text}") + else: + answer_details.append(f" Question ID {question_id}: No answer provided") + + answers_text = "\n".join(answer_details) if answer_details else " No answers found" + + result = f"""Form Response Details for {user_google_email}: +- Form ID: {form_id} +- Response ID: {response_id} +- Created: {create_time} +- Last Submitted: {last_submitted_time} +- Answers: +{answers_text}""" + + logger.info( + f"Successfully retrieved response for {user_google_email}. Response ID: {response_id}" + ) + return result + + +@server.tool() +@handle_http_errors("list_form_responses", is_read_only=True, service_type="forms") +@require_google_service("forms", "forms") +async def list_form_responses( + service, + user_google_email: str, + form_id: str, + page_size: int = 10, + page_token: Optional[str] = None, +) -> str: + """ + List a form's responses. + + Args: + user_google_email (str): The user's Google email address. Required. + form_id (str): The ID of the form. + page_size (int): Maximum number of responses to return. Defaults to 10. + page_token (Optional[str]): Token for retrieving next page of results. + + Returns: + str: List of responses with basic details and pagination info. + """ + logger.info( + f"[list_form_responses] Invoked. Email: '{user_google_email}', Form ID: {form_id}" + ) + + params = {"formId": form_id, "pageSize": page_size} + if page_token: + params["pageToken"] = page_token + + responses_result = await asyncio.to_thread( + service.forms().responses().list(**params).execute + ) + + responses = responses_result.get("responses", []) + next_page_token = responses_result.get("nextPageToken") + + if not responses: + return f"No responses found for form {form_id} for {user_google_email}." + + response_details = [] + for i, response in enumerate(responses, 1): + response_id = response.get("responseId", "Unknown") + create_time = response.get("createTime", "Unknown") + last_submitted_time = response.get("lastSubmittedTime", "Unknown") + + answers_count = len(response.get("answers", {})) + response_details.append( + f" {i}. Response ID: {response_id} | Created: {create_time} | Last Submitted: {last_submitted_time} | Answers: {answers_count}" + ) + + pagination_info = ( + f"\nNext page token: {next_page_token}" + if next_page_token + else "\nNo more pages." + ) + + result = f"""Form Responses for {user_google_email}: +- Form ID: {form_id} +- Total responses returned: {len(responses)} +- Responses: +{chr(10).join(response_details)}{pagination_info}""" + + logger.info( + f"Successfully retrieved {len(responses)} responses for {user_google_email}. Form ID: {form_id}" + ) + return result + + +# Internal implementation function for testing +async def _batch_update_form_impl( + service: Any, + form_id: str, + requests: List[Dict[str, Any]], +) -> str: + """Internal implementation for batch_update_form. + + Applies batch updates to a Google Form using the Forms API batchUpdate method. + + Args: + service: Google Forms API service client. + form_id: The ID of the form to update. + requests: List of update request dictionaries. + + Returns: + Formatted string with batch update results. + """ + body = {"requests": requests} + + result = await asyncio.to_thread( + service.forms().batchUpdate(formId=form_id, body=body).execute + ) + + replies = result.get("replies", []) + + confirmation_message = f"""Batch Update Completed: +- Form ID: {form_id} +- URL: https://docs.google.com/forms/d/{form_id}/edit +- Requests Applied: {len(requests)} +- Replies Received: {len(replies)}""" + + if replies: + confirmation_message += "\n\nUpdate Results:" + for i, reply in enumerate(replies, 1): + if "createItem" in reply: + item_id = reply["createItem"].get("itemId", "Unknown") + question_ids = reply["createItem"].get("questionId", []) + question_info = ( + f" (Question IDs: {', '.join(question_ids)})" + if question_ids + else "" + ) + confirmation_message += ( + f"\n Request {i}: Created item {item_id}{question_info}" + ) + else: + confirmation_message += f"\n Request {i}: Operation completed" + + return confirmation_message + + +@server.tool() +@handle_http_errors("batch_update_form", service_type="forms") +@require_google_service("forms", "forms") +async def batch_update_form( + service, + user_google_email: str, + form_id: str, + requests: List[Dict[str, Any]], +) -> str: + """ + Apply batch updates to a Google Form. + + Supports adding, updating, and deleting form items, as well as updating + form metadata and settings. This is the primary method for modifying form + content after creation. + + Args: + user_google_email (str): The user's Google email address. Required. + form_id (str): The ID of the form to update. + requests (List[Dict[str, Any]]): List of update requests to apply. + Supported request types: + - createItem: Add a new question or content item + - updateItem: Modify an existing item + - deleteItem: Remove an item + - moveItem: Reorder an item + - updateFormInfo: Update form title/description + - updateSettings: Modify form settings (e.g., quiz mode) + + Returns: + str: Details about the batch update operation results. + """ + logger.info( + f"[batch_update_form] Invoked. Email: '{user_google_email}', " + f"Form ID: '{form_id}', Requests: {len(requests)}" + ) + + result = await _batch_update_form_impl(service, form_id, requests) + + logger.info(f"Batch update completed successfully for {user_google_email}") + return result diff --git a/glama.json b/glama.json new file mode 100644 index 0000000..24fa36d --- /dev/null +++ b/glama.json @@ -0,0 +1,4 @@ +{ + "$schema": "https://glama.ai/mcp/schemas/server.json", + "maintainers": ["taylorwilsdon"] +} diff --git a/gmail/__init__.py b/gmail/__init__.py new file mode 100644 index 0000000..55eb575 --- /dev/null +++ b/gmail/__init__.py @@ -0,0 +1 @@ +# This file marks the 'gmail' directory as a Python package. diff --git a/gmail/gmail_tools.py b/gmail/gmail_tools.py new file mode 100644 index 0000000..75afc41 --- /dev/null +++ b/gmail/gmail_tools.py @@ -0,0 +1,2376 @@ +""" +Google Gmail MCP Tools + +This module provides MCP tools for interacting with the Gmail API. +""" + +import logging +import asyncio +import base64 +import re +import ssl +import mimetypes +from html.parser import HTMLParser +from typing import Annotated, Optional, List, Dict, Literal, Any + +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from email.mime.base import MIMEBase +from email import encoders +from email.utils import formataddr + +from pydantic import Field +from googleapiclient.errors import HttpError + +from auth.service_decorator import require_google_service +from core.utils import handle_http_errors, validate_file_path, UserInputError +from core.server import server +from auth.scopes import ( + GMAIL_SEND_SCOPE, + GMAIL_COMPOSE_SCOPE, + GMAIL_MODIFY_SCOPE, + GMAIL_LABELS_SCOPE, +) + +logger = logging.getLogger(__name__) + +GMAIL_BATCH_SIZE = 25 +GMAIL_REQUEST_DELAY = 0.1 +HTML_BODY_TRUNCATE_LIMIT = 20000 + +GMAIL_METADATA_HEADERS = [ + "Subject", + "From", + "To", + "Cc", + "Message-ID", + "In-Reply-To", + "References", + "Date", +] +LOW_VALUE_TEXT_PLACEHOLDERS = ( + "your client does not support html", + "view this email in your browser", + "open this email in your browser", +) +LOW_VALUE_TEXT_FOOTER_MARKERS = ( + "mailing list", + "mailman/listinfo", + "unsubscribe", + "list-unsubscribe", + "manage preferences", +) +LOW_VALUE_TEXT_HTML_DIFF_MIN = 80 + + +class _HTMLTextExtractor(HTMLParser): + """Extract readable text from HTML using stdlib.""" + + def __init__(self): + super().__init__() + self._text = [] + self._skip = False + + def handle_starttag(self, tag, attrs): + self._skip = tag in ("script", "style") + + def handle_endtag(self, tag): + if tag in ("script", "style"): + self._skip = False + + def handle_data(self, data): + if not self._skip: + self._text.append(data) + + def get_text(self) -> str: + return " ".join("".join(self._text).split()) + + +def _html_to_text(html: str) -> str: + """Convert HTML to readable plain text.""" + try: + parser = _HTMLTextExtractor() + parser.feed(html) + return parser.get_text() + except Exception: + return html + + +def _extract_message_body(payload): + """ + Helper function to extract plain text body from a Gmail message payload. + (Maintained for backward compatibility) + + Args: + payload (dict): The message payload from Gmail API + + Returns: + str: The plain text body content, or empty string if not found + """ + bodies = _extract_message_bodies(payload) + return bodies.get("text", "") + + +def _extract_message_bodies(payload): + """ + Helper function to extract both plain text and HTML bodies from a Gmail message payload. + + Args: + payload (dict): The message payload from Gmail API + + Returns: + dict: Dictionary with 'text' and 'html' keys containing body content + """ + text_body = "" + html_body = "" + parts = [payload] if "parts" not in payload else payload.get("parts", []) + + part_queue = list(parts) # Use a queue for BFS traversal of parts + while part_queue: + part = part_queue.pop(0) + mime_type = part.get("mimeType", "") + body_data = part.get("body", {}).get("data") + + if body_data: + try: + decoded_data = base64.urlsafe_b64decode(body_data).decode( + "utf-8", errors="ignore" + ) + if mime_type == "text/plain" and not text_body: + text_body = decoded_data + elif mime_type == "text/html" and not html_body: + html_body = decoded_data + except Exception as e: + logger.warning(f"Failed to decode body part: {e}") + + # Add sub-parts to queue for multipart messages + if mime_type.startswith("multipart/") and "parts" in part: + part_queue.extend(part.get("parts", [])) + + # Check the main payload if it has body data directly + if payload.get("body", {}).get("data"): + try: + decoded_data = base64.urlsafe_b64decode(payload["body"]["data"]).decode( + "utf-8", errors="ignore" + ) + mime_type = payload.get("mimeType", "") + if mime_type == "text/plain" and not text_body: + text_body = decoded_data + elif mime_type == "text/html" and not html_body: + html_body = decoded_data + except Exception as e: + logger.warning(f"Failed to decode main payload body: {e}") + + return {"text": text_body, "html": html_body} + + +def _format_body_content(text_body: str, html_body: str) -> str: + """ + Helper function to format message body content with HTML fallback and truncation. + Detects useless text/plain fallbacks (e.g., "Your client does not support HTML"). + + Args: + text_body: Plain text body content + html_body: HTML body content + + Returns: + Formatted body content string + """ + text_stripped = text_body.strip() + html_stripped = html_body.strip() + html_text = _html_to_text(html_stripped).strip() if html_stripped else "" + + plain_lower = " ".join(text_stripped.split()).lower() + html_lower = " ".join(html_text.split()).lower() + plain_is_low_value = plain_lower and ( + any(marker in plain_lower for marker in LOW_VALUE_TEXT_PLACEHOLDERS) + or ( + any(marker in plain_lower for marker in LOW_VALUE_TEXT_FOOTER_MARKERS) + and len(html_lower) >= len(plain_lower) + LOW_VALUE_TEXT_HTML_DIFF_MIN + ) + or ( + len(html_lower) >= len(plain_lower) + LOW_VALUE_TEXT_HTML_DIFF_MIN + and html_lower.endswith(plain_lower) + ) + ) + + # Prefer plain text, but fall back to HTML when plain text is empty or clearly low-value. + use_html = html_text and ( + not text_stripped or "