Merge pull request #580 from Bortlesboat/feat/517-cell-notes

feat(sheets): expose cell notes in read_sheet_values
2026-03-15 17:53:43 -04:00
parent 4f6b35b218 ba3db90a62
commit 7fe01c1b86
2 changed files with 184 additions and 40 deletions
--- a/gsheets/sheets_helpers.py
+++ b/gsheets/sheets_helpers.py
@@ -7,11 +7,15 @@ conditional formatting helpers.
 import asyncio
 import json
 import logging
 import re
 from typing import List, Optional, Union
 from core.utils import UserInputError
 logger = logging.getLogger(__name__)
 MAX_GRID_METADATA_CELLS = 5000
 A1_PART_REGEX = re.compile(r"^([A-Za-z]*)(\d*)$")
 SHEET_TITLE_SAFE_RE = re.compile(r"^[A-Za-z0-9_]+$")
@@ -877,3 +881,170 @@ def _build_gradient_rule(
        rule_body["gradientRule"]["midpoint"] = gradient_points[1]
        rule_body["gradientRule"]["maxpoint"] = gradient_points[2]
    return rule_body
 def _extract_cell_notes_from_grid(spreadsheet: dict) -> list[dict[str, str]]:
    """
    Extract cell notes from spreadsheet grid data.
    Returns a list of dictionaries with:
        - "cell": cell A1 reference
        - "note": the note text
    """
    notes: list[dict[str, str]] = []
    for sheet in spreadsheet.get("sheets", []) or []:
        sheet_title = sheet.get("properties", {}).get("title") or "Unknown"
        for grid in sheet.get("data", []) or []:
            start_row = _coerce_int(grid.get("startRow"), default=0)
            start_col = _coerce_int(grid.get("startColumn"), default=0)
            for row_offset, row_data in enumerate(grid.get("rowData", []) or []):
                if not row_data:
                    continue
                for col_offset, cell_data in enumerate(
                    row_data.get("values", []) or []
                ):
                    if not cell_data:
                        continue
                    note = cell_data.get("note")
                    if not note:
                        continue
                    notes.append(
                        {
                            "cell": _format_a1_cell(
                                sheet_title,
                                start_row + row_offset,
                                start_col + col_offset,
                            ),
                            "note": note,
                        }
                    )
    return notes
 async def _fetch_sheet_notes(
    service, spreadsheet_id: str, a1_range: str
 ) -> list[dict[str, str]]:
    """Fetch cell notes for the given range via spreadsheets.get with includeGridData."""
    response = await asyncio.to_thread(
        service.spreadsheets()
        .get(
            spreadsheetId=spreadsheet_id,
            ranges=[a1_range],
            includeGridData=True,
            fields="sheets(properties(title),data(startRow,startColumn,rowData(values(note))))",
        )
        .execute
    )
    return _extract_cell_notes_from_grid(response)
 def _format_sheet_notes_section(
    *, notes: list[dict[str, str]], range_label: str, max_details: int = 25
 ) -> str:
    """
    Format a list of cell notes into a human-readable section.
    """
    if not notes:
        return ""
    lines = []
    for item in notes[:max_details]:
        cell = item.get("cell") or "(unknown cell)"
        note = item.get("note") or "(empty note)"
        lines.append(f"- {cell}: {note}")
    suffix = (
        f"\n... and {len(notes) - max_details} more notes"
        if len(notes) > max_details
        else ""
    )
    return f"\n\nCell notes in range '{range_label}':\n" + "\n".join(lines) + suffix
 async def _fetch_grid_metadata(
    service,
    spreadsheet_id: str,
    resolved_range: str,
    values: List[List[object]],
    include_hyperlinks: bool = False,
    include_notes: bool = False,
 ) -> tuple[str, str]:
    """Fetch hyperlinks and/or notes for a range via a single spreadsheets.get call.
    Computes tight range bounds, enforces the cell-count cap, builds a combined
    ``fields`` selector so only one API round-trip is needed when both flags are
    ``True``, then parses the response into formatted output sections.
    Returns:
        (hyperlink_section, notes_section) — each is an empty string when the
        corresponding flag is ``False`` or no data was found.
    """
    if not include_hyperlinks and not include_notes:
        return "", ""
    tight_range = _a1_range_for_values(resolved_range, values)
    if not tight_range:
        logger.info(
            "[read_sheet_values] Skipping grid metadata fetch for range '%s': "
            "unable to determine tight bounds",
            resolved_range,
        )
        return "", ""
    cell_count = _a1_range_cell_count(tight_range) or sum(len(row) for row in values)
    if cell_count > MAX_GRID_METADATA_CELLS:
        logger.info(
            "[read_sheet_values] Skipping grid metadata fetch for large range "
            "'%s' (%d cells > %d limit)",
            tight_range,
            cell_count,
            MAX_GRID_METADATA_CELLS,
        )
        return "", ""
    # Build a combined fields selector so we hit the API at most once.
    value_fields: list[str] = []
    if include_hyperlinks:
        value_fields.extend(["hyperlink", "textFormatRuns(format(link(uri)))"])
    if include_notes:
        value_fields.append("note")
    fields = (
        "sheets(properties(title),data(startRow,startColumn,"
        f"rowData(values({','.join(value_fields)}))))"
    )
    try:
        response = await asyncio.to_thread(
            service.spreadsheets()
            .get(
                spreadsheetId=spreadsheet_id,
                ranges=[tight_range],
                includeGridData=True,
                fields=fields,
            )
            .execute
        )
    except Exception as exc:
        logger.warning(
            "[read_sheet_values] Failed fetching grid metadata for range '%s': %s",
            tight_range,
            exc,
        )
        return "", ""
    hyperlink_section = ""
    if include_hyperlinks:
        hyperlinks = _extract_cell_hyperlinks_from_grid(response)
        hyperlink_section = _format_sheet_hyperlink_section(
            hyperlinks=hyperlinks, range_label=tight_range
        )
    notes_section = ""
    if include_notes:
        notes = _extract_cell_notes_from_grid(response)
        notes_section = _format_sheet_notes_section(
            notes=notes, range_label=tight_range
        )
    return hyperlink_section, notes_section
--- a/gsheets/sheets_tools.py
+++ b/gsheets/sheets_tools.py
@@ -15,16 +15,14 @@ from core.server import server
 from core.utils import handle_http_errors, UserInputError
 from core.comments import create_comment_tools
 from gsheets.sheets_helpers import (
    _a1_range_cell_count,
    CONDITION_TYPES,
    _a1_range_for_values,
    _build_boolean_rule,
    _build_gradient_rule,
    _fetch_detailed_sheet_errors,
-    _fetch_sheet_hyperlinks,
+    _fetch_grid_metadata,
    _fetch_sheets_with_rules,
    _format_conditional_rules_section,
    _format_sheet_hyperlink_section,
    _format_sheet_error_section,
    _parse_a1_range,
    _parse_condition_values,
@@ -36,7 +34,6 @@ from gsheets.sheets_helpers import (
 # Configure module logger
 logger = logging.getLogger(__name__)
 MAX_HYPERLINK_FETCH_CELLS = 5000
@server.tool()
@@ -179,6 +176,7 @@ async def read_sheet_values(
    spreadsheet_id: str,
    range_name: str = "A1:Z1000",
    include_hyperlinks: bool = False,
    include_notes: bool = False,
 ) -> str:
    """
    Reads values from a specific range in a Google Sheet.
@@ -189,6 +187,8 @@ async def read_sheet_values(
        range_name (str): The range to read (e.g., "Sheet1!A1:D10", "A1:D10"). Defaults to "A1:Z1000".
        include_hyperlinks (bool): If True, also fetch hyperlink metadata for the range.
            Defaults to False to avoid expensive includeGridData requests.
        include_notes (bool): If True, also fetch cell notes for the range.
            Defaults to False to avoid expensive includeGridData requests.
    Returns:
        str: The formatted values from the specified range.
@@ -211,41 +211,14 @@ async def read_sheet_values(
    resolved_range = result.get("range", range_name)
    detailed_range = _a1_range_for_values(resolved_range, values) or resolved_range
-    hyperlink_section = ""
+    hyperlink_section, notes_section = await _fetch_grid_metadata(
-    if include_hyperlinks:
+        service,
-        # Use a tight A1 range for includeGridData fetches to avoid expensive
+        spreadsheet_id,
-        # open-ended requests (e.g., A:Z).
+        resolved_range,
-        hyperlink_range = _a1_range_for_values(resolved_range, values)
+        values,
-        if not hyperlink_range:
+        include_hyperlinks=include_hyperlinks,
-            logger.info(
+        include_notes=include_notes,
-                "[read_sheet_values] Skipping hyperlink fetch for range '%s': unable to determine tight bounds",
+    )
                resolved_range,
            )
        else:
            cell_count = _a1_range_cell_count(hyperlink_range) or sum(
                len(row) for row in values
            )
            if cell_count <= MAX_HYPERLINK_FETCH_CELLS:
                try:
                    hyperlinks = await _fetch_sheet_hyperlinks(
                        service, spreadsheet_id, hyperlink_range
                    )
                    hyperlink_section = _format_sheet_hyperlink_section(
                        hyperlinks=hyperlinks, range_label=hyperlink_range
                    )
                except Exception as exc:
                    logger.warning(
                        "[read_sheet_values] Failed fetching hyperlinks for range '%s': %s",
                        hyperlink_range,
                        exc,
                    )
            else:
                logger.info(
                    "[read_sheet_values] Skipping hyperlink fetch for large range '%s' (%d cells > %d limit)",
                    hyperlink_range,
                    cell_count,
                    MAX_HYPERLINK_FETCH_CELLS,
                )
    detailed_errors_section = ""
    if _values_contain_sheets_errors(values):
@@ -277,7 +250,7 @@ async def read_sheet_values(
    )
    logger.info(f"Successfully read {len(values)} rows for {user_google_email}.")
-    return text_output + hyperlink_section + detailed_errors_section
+    return text_output + hyperlink_section + notes_section + detailed_errors_section
@server.tool()