Merge pull request #580 from Bortlesboat/feat/517-cell-notes

feat(sheets): expose cell notes in read_sheet_values
This commit is contained in:
Taylor Wilsdon
2026-03-15 17:53:43 -04:00
committed by GitHub
2 changed files with 184 additions and 40 deletions

View File

@@ -7,11 +7,15 @@ conditional formatting helpers.
import asyncio import asyncio
import json import json
import logging
import re import re
from typing import List, Optional, Union from typing import List, Optional, Union
from core.utils import UserInputError from core.utils import UserInputError
logger = logging.getLogger(__name__)
MAX_GRID_METADATA_CELLS = 5000
A1_PART_REGEX = re.compile(r"^([A-Za-z]*)(\d*)$") A1_PART_REGEX = re.compile(r"^([A-Za-z]*)(\d*)$")
SHEET_TITLE_SAFE_RE = re.compile(r"^[A-Za-z0-9_]+$") SHEET_TITLE_SAFE_RE = re.compile(r"^[A-Za-z0-9_]+$")
@@ -877,3 +881,170 @@ def _build_gradient_rule(
rule_body["gradientRule"]["midpoint"] = gradient_points[1] rule_body["gradientRule"]["midpoint"] = gradient_points[1]
rule_body["gradientRule"]["maxpoint"] = gradient_points[2] rule_body["gradientRule"]["maxpoint"] = gradient_points[2]
return rule_body return rule_body
def _extract_cell_notes_from_grid(spreadsheet: dict) -> list[dict[str, str]]:
"""
Extract cell notes from spreadsheet grid data.
Returns a list of dictionaries with:
- "cell": cell A1 reference
- "note": the note text
"""
notes: list[dict[str, str]] = []
for sheet in spreadsheet.get("sheets", []) or []:
sheet_title = sheet.get("properties", {}).get("title") or "Unknown"
for grid in sheet.get("data", []) or []:
start_row = _coerce_int(grid.get("startRow"), default=0)
start_col = _coerce_int(grid.get("startColumn"), default=0)
for row_offset, row_data in enumerate(grid.get("rowData", []) or []):
if not row_data:
continue
for col_offset, cell_data in enumerate(
row_data.get("values", []) or []
):
if not cell_data:
continue
note = cell_data.get("note")
if not note:
continue
notes.append(
{
"cell": _format_a1_cell(
sheet_title,
start_row + row_offset,
start_col + col_offset,
),
"note": note,
}
)
return notes
async def _fetch_sheet_notes(
service, spreadsheet_id: str, a1_range: str
) -> list[dict[str, str]]:
"""Fetch cell notes for the given range via spreadsheets.get with includeGridData."""
response = await asyncio.to_thread(
service.spreadsheets()
.get(
spreadsheetId=spreadsheet_id,
ranges=[a1_range],
includeGridData=True,
fields="sheets(properties(title),data(startRow,startColumn,rowData(values(note))))",
)
.execute
)
return _extract_cell_notes_from_grid(response)
def _format_sheet_notes_section(
*, notes: list[dict[str, str]], range_label: str, max_details: int = 25
) -> str:
"""
Format a list of cell notes into a human-readable section.
"""
if not notes:
return ""
lines = []
for item in notes[:max_details]:
cell = item.get("cell") or "(unknown cell)"
note = item.get("note") or "(empty note)"
lines.append(f"- {cell}: {note}")
suffix = (
f"\n... and {len(notes) - max_details} more notes"
if len(notes) > max_details
else ""
)
return f"\n\nCell notes in range '{range_label}':\n" + "\n".join(lines) + suffix
async def _fetch_grid_metadata(
service,
spreadsheet_id: str,
resolved_range: str,
values: List[List[object]],
include_hyperlinks: bool = False,
include_notes: bool = False,
) -> tuple[str, str]:
"""Fetch hyperlinks and/or notes for a range via a single spreadsheets.get call.
Computes tight range bounds, enforces the cell-count cap, builds a combined
``fields`` selector so only one API round-trip is needed when both flags are
``True``, then parses the response into formatted output sections.
Returns:
(hyperlink_section, notes_section) — each is an empty string when the
corresponding flag is ``False`` or no data was found.
"""
if not include_hyperlinks and not include_notes:
return "", ""
tight_range = _a1_range_for_values(resolved_range, values)
if not tight_range:
logger.info(
"[read_sheet_values] Skipping grid metadata fetch for range '%s': "
"unable to determine tight bounds",
resolved_range,
)
return "", ""
cell_count = _a1_range_cell_count(tight_range) or sum(len(row) for row in values)
if cell_count > MAX_GRID_METADATA_CELLS:
logger.info(
"[read_sheet_values] Skipping grid metadata fetch for large range "
"'%s' (%d cells > %d limit)",
tight_range,
cell_count,
MAX_GRID_METADATA_CELLS,
)
return "", ""
# Build a combined fields selector so we hit the API at most once.
value_fields: list[str] = []
if include_hyperlinks:
value_fields.extend(["hyperlink", "textFormatRuns(format(link(uri)))"])
if include_notes:
value_fields.append("note")
fields = (
"sheets(properties(title),data(startRow,startColumn,"
f"rowData(values({','.join(value_fields)}))))"
)
try:
response = await asyncio.to_thread(
service.spreadsheets()
.get(
spreadsheetId=spreadsheet_id,
ranges=[tight_range],
includeGridData=True,
fields=fields,
)
.execute
)
except Exception as exc:
logger.warning(
"[read_sheet_values] Failed fetching grid metadata for range '%s': %s",
tight_range,
exc,
)
return "", ""
hyperlink_section = ""
if include_hyperlinks:
hyperlinks = _extract_cell_hyperlinks_from_grid(response)
hyperlink_section = _format_sheet_hyperlink_section(
hyperlinks=hyperlinks, range_label=tight_range
)
notes_section = ""
if include_notes:
notes = _extract_cell_notes_from_grid(response)
notes_section = _format_sheet_notes_section(
notes=notes, range_label=tight_range
)
return hyperlink_section, notes_section

View File

@@ -15,16 +15,14 @@ from core.server import server
from core.utils import handle_http_errors, UserInputError from core.utils import handle_http_errors, UserInputError
from core.comments import create_comment_tools from core.comments import create_comment_tools
from gsheets.sheets_helpers import ( from gsheets.sheets_helpers import (
_a1_range_cell_count,
CONDITION_TYPES, CONDITION_TYPES,
_a1_range_for_values, _a1_range_for_values,
_build_boolean_rule, _build_boolean_rule,
_build_gradient_rule, _build_gradient_rule,
_fetch_detailed_sheet_errors, _fetch_detailed_sheet_errors,
_fetch_sheet_hyperlinks, _fetch_grid_metadata,
_fetch_sheets_with_rules, _fetch_sheets_with_rules,
_format_conditional_rules_section, _format_conditional_rules_section,
_format_sheet_hyperlink_section,
_format_sheet_error_section, _format_sheet_error_section,
_parse_a1_range, _parse_a1_range,
_parse_condition_values, _parse_condition_values,
@@ -36,7 +34,6 @@ from gsheets.sheets_helpers import (
# Configure module logger # Configure module logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
MAX_HYPERLINK_FETCH_CELLS = 5000
@server.tool() @server.tool()
@@ -179,6 +176,7 @@ async def read_sheet_values(
spreadsheet_id: str, spreadsheet_id: str,
range_name: str = "A1:Z1000", range_name: str = "A1:Z1000",
include_hyperlinks: bool = False, include_hyperlinks: bool = False,
include_notes: bool = False,
) -> str: ) -> str:
""" """
Reads values from a specific range in a Google Sheet. Reads values from a specific range in a Google Sheet.
@@ -189,6 +187,8 @@ async def read_sheet_values(
range_name (str): The range to read (e.g., "Sheet1!A1:D10", "A1:D10"). Defaults to "A1:Z1000". range_name (str): The range to read (e.g., "Sheet1!A1:D10", "A1:D10"). Defaults to "A1:Z1000".
include_hyperlinks (bool): If True, also fetch hyperlink metadata for the range. include_hyperlinks (bool): If True, also fetch hyperlink metadata for the range.
Defaults to False to avoid expensive includeGridData requests. Defaults to False to avoid expensive includeGridData requests.
include_notes (bool): If True, also fetch cell notes for the range.
Defaults to False to avoid expensive includeGridData requests.
Returns: Returns:
str: The formatted values from the specified range. str: The formatted values from the specified range.
@@ -211,41 +211,14 @@ async def read_sheet_values(
resolved_range = result.get("range", range_name) resolved_range = result.get("range", range_name)
detailed_range = _a1_range_for_values(resolved_range, values) or resolved_range detailed_range = _a1_range_for_values(resolved_range, values) or resolved_range
hyperlink_section = "" hyperlink_section, notes_section = await _fetch_grid_metadata(
if include_hyperlinks: service,
# Use a tight A1 range for includeGridData fetches to avoid expensive spreadsheet_id,
# open-ended requests (e.g., A:Z). resolved_range,
hyperlink_range = _a1_range_for_values(resolved_range, values) values,
if not hyperlink_range: include_hyperlinks=include_hyperlinks,
logger.info( include_notes=include_notes,
"[read_sheet_values] Skipping hyperlink fetch for range '%s': unable to determine tight bounds", )
resolved_range,
)
else:
cell_count = _a1_range_cell_count(hyperlink_range) or sum(
len(row) for row in values
)
if cell_count <= MAX_HYPERLINK_FETCH_CELLS:
try:
hyperlinks = await _fetch_sheet_hyperlinks(
service, spreadsheet_id, hyperlink_range
)
hyperlink_section = _format_sheet_hyperlink_section(
hyperlinks=hyperlinks, range_label=hyperlink_range
)
except Exception as exc:
logger.warning(
"[read_sheet_values] Failed fetching hyperlinks for range '%s': %s",
hyperlink_range,
exc,
)
else:
logger.info(
"[read_sheet_values] Skipping hyperlink fetch for large range '%s' (%d cells > %d limit)",
hyperlink_range,
cell_count,
MAX_HYPERLINK_FETCH_CELLS,
)
detailed_errors_section = "" detailed_errors_section = ""
if _values_contain_sheets_errors(values): if _values_contain_sheets_errors(values):
@@ -277,7 +250,7 @@ async def read_sheet_values(
) )
logger.info(f"Successfully read {len(values)} rows for {user_google_email}.") logger.info(f"Successfully read {len(values)} rows for {user_google_email}.")
return text_output + hyperlink_section + detailed_errors_section return text_output + hyperlink_section + notes_section + detailed_errors_section
@server.tool() @server.tool()