Merge pull request #580 from Bortlesboat/feat/517-cell-notes

feat(sheets): expose cell notes in read_sheet_values
2026-03-15 17:53:43 -04:00
parent 4f6b35b218 ba3db90a62
commit 7fe01c1b86
2 changed files with 184 additions and 40 deletions
--- a/gsheets/sheets_helpers.py
+++ b/gsheets/sheets_helpers.py
@@ -7,11 +7,15 @@ conditional formatting helpers.

 import asyncio
 import json
+import logging
 import re
 from typing import List, Optional, Union

 from core.utils import UserInputError

+logger = logging.getLogger(__name__)
+
+MAX_GRID_METADATA_CELLS = 5000

 A1_PART_REGEX = re.compile(r"^([A-Za-z]*)(\d*)$")
 SHEET_TITLE_SAFE_RE = re.compile(r"^[A-Za-z0-9_]+$")
@@ -877,3 +881,170 @@ def _build_gradient_rule(
        rule_body["gradientRule"]["midpoint"] = gradient_points[1]
        rule_body["gradientRule"]["maxpoint"] = gradient_points[2]
    return rule_body
+
+
+def _extract_cell_notes_from_grid(spreadsheet: dict) -> list[dict[str, str]]:
+    """
+    Extract cell notes from spreadsheet grid data.
+
+    Returns a list of dictionaries with:
+        - "cell": cell A1 reference
+        - "note": the note text
+    """
+    notes: list[dict[str, str]] = []
+    for sheet in spreadsheet.get("sheets", []) or []:
+        sheet_title = sheet.get("properties", {}).get("title") or "Unknown"
+        for grid in sheet.get("data", []) or []:
+            start_row = _coerce_int(grid.get("startRow"), default=0)
+            start_col = _coerce_int(grid.get("startColumn"), default=0)
+            for row_offset, row_data in enumerate(grid.get("rowData", []) or []):
+                if not row_data:
+                    continue
+                for col_offset, cell_data in enumerate(
+                    row_data.get("values", []) or []
+                ):
+                    if not cell_data:
+                        continue
+                    note = cell_data.get("note")
+                    if not note:
+                        continue
+                    notes.append(
+                        {
+                            "cell": _format_a1_cell(
+                                sheet_title,
+                                start_row + row_offset,
+                                start_col + col_offset,
+                            ),
+                            "note": note,
+                        }
+                    )
+    return notes
+
+
+async def _fetch_sheet_notes(
+    service, spreadsheet_id: str, a1_range: str
+) -> list[dict[str, str]]:
+    """Fetch cell notes for the given range via spreadsheets.get with includeGridData."""
+    response = await asyncio.to_thread(
+        service.spreadsheets()
+        .get(
+            spreadsheetId=spreadsheet_id,
+            ranges=[a1_range],
+            includeGridData=True,
+            fields="sheets(properties(title),data(startRow,startColumn,rowData(values(note))))",
+        )
+        .execute
+    )
+    return _extract_cell_notes_from_grid(response)
+
+
+def _format_sheet_notes_section(
+    *, notes: list[dict[str, str]], range_label: str, max_details: int = 25
+) -> str:
+    """
+    Format a list of cell notes into a human-readable section.
+    """
+    if not notes:
+        return ""
+
+    lines = []
+    for item in notes[:max_details]:
+        cell = item.get("cell") or "(unknown cell)"
+        note = item.get("note") or "(empty note)"
+        lines.append(f"- {cell}: {note}")
+
+    suffix = (
+        f"\n... and {len(notes) - max_details} more notes"
+        if len(notes) > max_details
+        else ""
+    )
+    return f"\n\nCell notes in range '{range_label}':\n" + "\n".join(lines) + suffix
+
+
+async def _fetch_grid_metadata(
+    service,
+    spreadsheet_id: str,
+    resolved_range: str,
+    values: List[List[object]],
+    include_hyperlinks: bool = False,
+    include_notes: bool = False,
+) -> tuple[str, str]:
+    """Fetch hyperlinks and/or notes for a range via a single spreadsheets.get call.
+
+    Computes tight range bounds, enforces the cell-count cap, builds a combined
+    ``fields`` selector so only one API round-trip is needed when both flags are
+    ``True``, then parses the response into formatted output sections.
+
+    Returns:
+        (hyperlink_section, notes_section) — each is an empty string when the
+        corresponding flag is ``False`` or no data was found.
+    """
+    if not include_hyperlinks and not include_notes:
+        return "", ""
+
+    tight_range = _a1_range_for_values(resolved_range, values)
+    if not tight_range:
+        logger.info(
+            "[read_sheet_values] Skipping grid metadata fetch for range '%s': "
+            "unable to determine tight bounds",
+            resolved_range,
+        )
+        return "", ""
+
+    cell_count = _a1_range_cell_count(tight_range) or sum(len(row) for row in values)
+    if cell_count > MAX_GRID_METADATA_CELLS:
+        logger.info(
+            "[read_sheet_values] Skipping grid metadata fetch for large range "
+            "'%s' (%d cells > %d limit)",
+            tight_range,
+            cell_count,
+            MAX_GRID_METADATA_CELLS,
+        )
+        return "", ""
+
+    # Build a combined fields selector so we hit the API at most once.
+    value_fields: list[str] = []
+    if include_hyperlinks:
+        value_fields.extend(["hyperlink", "textFormatRuns(format(link(uri)))"])
+    if include_notes:
+        value_fields.append("note")
+
+    fields = (
+        "sheets(properties(title),data(startRow,startColumn,"
+        f"rowData(values({','.join(value_fields)}))))"
+    )
+
+    try:
+        response = await asyncio.to_thread(
+            service.spreadsheets()
+            .get(
+                spreadsheetId=spreadsheet_id,
+                ranges=[tight_range],
+                includeGridData=True,
+                fields=fields,
+            )
+            .execute
+        )
+    except Exception as exc:
+        logger.warning(
+            "[read_sheet_values] Failed fetching grid metadata for range '%s': %s",
+            tight_range,
+            exc,
+        )
+        return "", ""
+
+    hyperlink_section = ""
+    if include_hyperlinks:
+        hyperlinks = _extract_cell_hyperlinks_from_grid(response)
+        hyperlink_section = _format_sheet_hyperlink_section(
+            hyperlinks=hyperlinks, range_label=tight_range
+        )
+
+    notes_section = ""
+    if include_notes:
+        notes = _extract_cell_notes_from_grid(response)
+        notes_section = _format_sheet_notes_section(
+            notes=notes, range_label=tight_range
+        )
+
+    return hyperlink_section, notes_section
--- a/gsheets/sheets_tools.py
+++ b/gsheets/sheets_tools.py
@@ -15,16 +15,14 @@ from core.server import server
 from core.utils import handle_http_errors, UserInputError
 from core.comments import create_comment_tools
 from gsheets.sheets_helpers import (
-    _a1_range_cell_count,
    CONDITION_TYPES,
    _a1_range_for_values,
    _build_boolean_rule,
    _build_gradient_rule,
    _fetch_detailed_sheet_errors,
-    _fetch_sheet_hyperlinks,
+    _fetch_grid_metadata,
    _fetch_sheets_with_rules,
    _format_conditional_rules_section,
-    _format_sheet_hyperlink_section,
    _format_sheet_error_section,
    _parse_a1_range,
    _parse_condition_values,
@@ -36,7 +34,6 @@ from gsheets.sheets_helpers import (

 # Configure module logger
 logger = logging.getLogger(__name__)
-MAX_HYPERLINK_FETCH_CELLS = 5000


@server.tool()
@@ -179,6 +176,7 @@ async def read_sheet_values(
    spreadsheet_id: str,
    range_name: str = "A1:Z1000",
    include_hyperlinks: bool = False,
+    include_notes: bool = False,
 ) -> str:
    """
    Reads values from a specific range in a Google Sheet.
@@ -189,6 +187,8 @@ async def read_sheet_values(
        range_name (str): The range to read (e.g., "Sheet1!A1:D10", "A1:D10"). Defaults to "A1:Z1000".
        include_hyperlinks (bool): If True, also fetch hyperlink metadata for the range.
            Defaults to False to avoid expensive includeGridData requests.
+        include_notes (bool): If True, also fetch cell notes for the range.
+            Defaults to False to avoid expensive includeGridData requests.

    Returns:
        str: The formatted values from the specified range.
@@ -211,40 +211,13 @@ async def read_sheet_values(
    resolved_range = result.get("range", range_name)
    detailed_range = _a1_range_for_values(resolved_range, values) or resolved_range

-    hyperlink_section = ""
-    if include_hyperlinks:
-        # Use a tight A1 range for includeGridData fetches to avoid expensive
-        # open-ended requests (e.g., A:Z).
-        hyperlink_range = _a1_range_for_values(resolved_range, values)
-        if not hyperlink_range:
-            logger.info(
-                "[read_sheet_values] Skipping hyperlink fetch for range '%s': unable to determine tight bounds",
+    hyperlink_section, notes_section = await _fetch_grid_metadata(
+        service,
+        spreadsheet_id,
        resolved_range,
-            )
-        else:
-            cell_count = _a1_range_cell_count(hyperlink_range) or sum(
-                len(row) for row in values
-            )
-            if cell_count <= MAX_HYPERLINK_FETCH_CELLS:
-                try:
-                    hyperlinks = await _fetch_sheet_hyperlinks(
-                        service, spreadsheet_id, hyperlink_range
-                    )
-                    hyperlink_section = _format_sheet_hyperlink_section(
-                        hyperlinks=hyperlinks, range_label=hyperlink_range
-                    )
-                except Exception as exc:
-                    logger.warning(
-                        "[read_sheet_values] Failed fetching hyperlinks for range '%s': %s",
-                        hyperlink_range,
-                        exc,
-                    )
-            else:
-                logger.info(
-                    "[read_sheet_values] Skipping hyperlink fetch for large range '%s' (%d cells > %d limit)",
-                    hyperlink_range,
-                    cell_count,
-                    MAX_HYPERLINK_FETCH_CELLS,
+        values,
+        include_hyperlinks=include_hyperlinks,
+        include_notes=include_notes,
    )

    detailed_errors_section = ""
@@ -277,7 +250,7 @@ async def read_sheet_values(
    )

    logger.info(f"Successfully read {len(values)} rows for {user_google_email}.")
-    return text_output + hyperlink_section + detailed_errors_section
+    return text_output + hyperlink_section + notes_section + detailed_errors_section


@server.tool()