Merge pull request #580 from Bortlesboat/feat/517-cell-notes

feat(sheets): expose cell notes in read_sheet_values
This commit is contained in:
Taylor Wilsdon
2026-03-15 17:53:43 -04:00
committed by GitHub
2 changed files with 184 additions and 40 deletions

View File

@@ -7,11 +7,15 @@ conditional formatting helpers.
import asyncio
import json
import logging
import re
from typing import List, Optional, Union
from core.utils import UserInputError
logger = logging.getLogger(__name__)
MAX_GRID_METADATA_CELLS = 5000
A1_PART_REGEX = re.compile(r"^([A-Za-z]*)(\d*)$")
SHEET_TITLE_SAFE_RE = re.compile(r"^[A-Za-z0-9_]+$")
@@ -877,3 +881,170 @@ def _build_gradient_rule(
rule_body["gradientRule"]["midpoint"] = gradient_points[1]
rule_body["gradientRule"]["maxpoint"] = gradient_points[2]
return rule_body
def _extract_cell_notes_from_grid(spreadsheet: dict) -> list[dict[str, str]]:
"""
Extract cell notes from spreadsheet grid data.
Returns a list of dictionaries with:
- "cell": cell A1 reference
- "note": the note text
"""
notes: list[dict[str, str]] = []
for sheet in spreadsheet.get("sheets", []) or []:
sheet_title = sheet.get("properties", {}).get("title") or "Unknown"
for grid in sheet.get("data", []) or []:
start_row = _coerce_int(grid.get("startRow"), default=0)
start_col = _coerce_int(grid.get("startColumn"), default=0)
for row_offset, row_data in enumerate(grid.get("rowData", []) or []):
if not row_data:
continue
for col_offset, cell_data in enumerate(
row_data.get("values", []) or []
):
if not cell_data:
continue
note = cell_data.get("note")
if not note:
continue
notes.append(
{
"cell": _format_a1_cell(
sheet_title,
start_row + row_offset,
start_col + col_offset,
),
"note": note,
}
)
return notes
async def _fetch_sheet_notes(
service, spreadsheet_id: str, a1_range: str
) -> list[dict[str, str]]:
"""Fetch cell notes for the given range via spreadsheets.get with includeGridData."""
response = await asyncio.to_thread(
service.spreadsheets()
.get(
spreadsheetId=spreadsheet_id,
ranges=[a1_range],
includeGridData=True,
fields="sheets(properties(title),data(startRow,startColumn,rowData(values(note))))",
)
.execute
)
return _extract_cell_notes_from_grid(response)
def _format_sheet_notes_section(
*, notes: list[dict[str, str]], range_label: str, max_details: int = 25
) -> str:
"""
Format a list of cell notes into a human-readable section.
"""
if not notes:
return ""
lines = []
for item in notes[:max_details]:
cell = item.get("cell") or "(unknown cell)"
note = item.get("note") or "(empty note)"
lines.append(f"- {cell}: {note}")
suffix = (
f"\n... and {len(notes) - max_details} more notes"
if len(notes) > max_details
else ""
)
return f"\n\nCell notes in range '{range_label}':\n" + "\n".join(lines) + suffix
async def _fetch_grid_metadata(
service,
spreadsheet_id: str,
resolved_range: str,
values: List[List[object]],
include_hyperlinks: bool = False,
include_notes: bool = False,
) -> tuple[str, str]:
"""Fetch hyperlinks and/or notes for a range via a single spreadsheets.get call.
Computes tight range bounds, enforces the cell-count cap, builds a combined
``fields`` selector so only one API round-trip is needed when both flags are
``True``, then parses the response into formatted output sections.
Returns:
(hyperlink_section, notes_section) — each is an empty string when the
corresponding flag is ``False`` or no data was found.
"""
if not include_hyperlinks and not include_notes:
return "", ""
tight_range = _a1_range_for_values(resolved_range, values)
if not tight_range:
logger.info(
"[read_sheet_values] Skipping grid metadata fetch for range '%s': "
"unable to determine tight bounds",
resolved_range,
)
return "", ""
cell_count = _a1_range_cell_count(tight_range) or sum(len(row) for row in values)
if cell_count > MAX_GRID_METADATA_CELLS:
logger.info(
"[read_sheet_values] Skipping grid metadata fetch for large range "
"'%s' (%d cells > %d limit)",
tight_range,
cell_count,
MAX_GRID_METADATA_CELLS,
)
return "", ""
# Build a combined fields selector so we hit the API at most once.
value_fields: list[str] = []
if include_hyperlinks:
value_fields.extend(["hyperlink", "textFormatRuns(format(link(uri)))"])
if include_notes:
value_fields.append("note")
fields = (
"sheets(properties(title),data(startRow,startColumn,"
f"rowData(values({','.join(value_fields)}))))"
)
try:
response = await asyncio.to_thread(
service.spreadsheets()
.get(
spreadsheetId=spreadsheet_id,
ranges=[tight_range],
includeGridData=True,
fields=fields,
)
.execute
)
except Exception as exc:
logger.warning(
"[read_sheet_values] Failed fetching grid metadata for range '%s': %s",
tight_range,
exc,
)
return "", ""
hyperlink_section = ""
if include_hyperlinks:
hyperlinks = _extract_cell_hyperlinks_from_grid(response)
hyperlink_section = _format_sheet_hyperlink_section(
hyperlinks=hyperlinks, range_label=tight_range
)
notes_section = ""
if include_notes:
notes = _extract_cell_notes_from_grid(response)
notes_section = _format_sheet_notes_section(
notes=notes, range_label=tight_range
)
return hyperlink_section, notes_section

View File

@@ -15,16 +15,14 @@ from core.server import server
from core.utils import handle_http_errors, UserInputError
from core.comments import create_comment_tools
from gsheets.sheets_helpers import (
_a1_range_cell_count,
CONDITION_TYPES,
_a1_range_for_values,
_build_boolean_rule,
_build_gradient_rule,
_fetch_detailed_sheet_errors,
_fetch_sheet_hyperlinks,
_fetch_grid_metadata,
_fetch_sheets_with_rules,
_format_conditional_rules_section,
_format_sheet_hyperlink_section,
_format_sheet_error_section,
_parse_a1_range,
_parse_condition_values,
@@ -36,7 +34,6 @@ from gsheets.sheets_helpers import (
# Configure module logger
logger = logging.getLogger(__name__)
MAX_HYPERLINK_FETCH_CELLS = 5000
@server.tool()
@@ -179,6 +176,7 @@ async def read_sheet_values(
spreadsheet_id: str,
range_name: str = "A1:Z1000",
include_hyperlinks: bool = False,
include_notes: bool = False,
) -> str:
"""
Reads values from a specific range in a Google Sheet.
@@ -189,6 +187,8 @@ async def read_sheet_values(
range_name (str): The range to read (e.g., "Sheet1!A1:D10", "A1:D10"). Defaults to "A1:Z1000".
include_hyperlinks (bool): If True, also fetch hyperlink metadata for the range.
Defaults to False to avoid expensive includeGridData requests.
include_notes (bool): If True, also fetch cell notes for the range.
Defaults to False to avoid expensive includeGridData requests.
Returns:
str: The formatted values from the specified range.
@@ -211,40 +211,13 @@ async def read_sheet_values(
resolved_range = result.get("range", range_name)
detailed_range = _a1_range_for_values(resolved_range, values) or resolved_range
hyperlink_section = ""
if include_hyperlinks:
# Use a tight A1 range for includeGridData fetches to avoid expensive
# open-ended requests (e.g., A:Z).
hyperlink_range = _a1_range_for_values(resolved_range, values)
if not hyperlink_range:
logger.info(
"[read_sheet_values] Skipping hyperlink fetch for range '%s': unable to determine tight bounds",
hyperlink_section, notes_section = await _fetch_grid_metadata(
service,
spreadsheet_id,
resolved_range,
)
else:
cell_count = _a1_range_cell_count(hyperlink_range) or sum(
len(row) for row in values
)
if cell_count <= MAX_HYPERLINK_FETCH_CELLS:
try:
hyperlinks = await _fetch_sheet_hyperlinks(
service, spreadsheet_id, hyperlink_range
)
hyperlink_section = _format_sheet_hyperlink_section(
hyperlinks=hyperlinks, range_label=hyperlink_range
)
except Exception as exc:
logger.warning(
"[read_sheet_values] Failed fetching hyperlinks for range '%s': %s",
hyperlink_range,
exc,
)
else:
logger.info(
"[read_sheet_values] Skipping hyperlink fetch for large range '%s' (%d cells > %d limit)",
hyperlink_range,
cell_count,
MAX_HYPERLINK_FETCH_CELLS,
values,
include_hyperlinks=include_hyperlinks,
include_notes=include_notes,
)
detailed_errors_section = ""
@@ -277,7 +250,7 @@ async def read_sheet_values(
)
logger.info(f"Successfully read {len(values)} rows for {user_google_email}.")
return text_output + hyperlink_section + detailed_errors_section
return text_output + hyperlink_section + notes_section + detailed_errors_section
@server.tool()