diff --git a/gdocs/docs_markdown.py b/gdocs/docs_markdown.py index 41a69d1..d9c183d 100644 --- a/gdocs/docs_markdown.py +++ b/gdocs/docs_markdown.py @@ -5,6 +5,7 @@ Converts Google Docs API JSON responses to clean Markdown, preserving: - Headings (H1-H6, Title, Subtitle) - Bold, italic, strikethrough, code, links - Ordered and unordered lists with nesting +- Checklists with checked/unchecked state - Tables with header row separators """ @@ -60,9 +61,20 @@ def convert_doc_to_markdown(doc: dict[str, Any]) -> str: if bullet: list_id = bullet["listId"] nesting = bullet.get("nestingLevel", 0) - is_ordered = _is_ordered_list(lists_meta, list_id, nesting) - if is_ordered: + if _is_checklist(lists_meta, list_id, nesting): + checked = _is_checked(para) + checkbox = "[x]" if checked else "[ ]" + indent = " " * nesting + # Re-render text without strikethrough for checked items + # to avoid redundant ~~text~~ alongside [x] + cb_text = ( + _convert_paragraph_text(para, skip_strikethrough=True) + if checked + else text + ) + lines.append(f"{indent}- {checkbox} {cb_text}") + elif _is_ordered_list(lists_meta, list_id, nesting): key = (list_id, nesting) ordered_counters[key] = ordered_counters.get(key, 0) + 1 counter = ordered_counters[key] @@ -102,16 +114,20 @@ def convert_doc_to_markdown(doc: dict[str, Any]) -> str: return result -def _convert_paragraph_text(para: dict[str, Any]) -> str: +def _convert_paragraph_text( + para: dict[str, Any], skip_strikethrough: bool = False +) -> str: """Convert paragraph elements to inline markdown text.""" parts: list[str] = [] for elem in para.get("elements", []): if "textRun" in elem: - parts.append(_convert_text_run(elem["textRun"])) + parts.append(_convert_text_run(elem["textRun"], skip_strikethrough)) return "".join(parts).strip() -def _convert_text_run(text_run: dict[str, Any]) -> str: +def _convert_text_run( + text_run: dict[str, Any], skip_strikethrough: bool = False +) -> str: """Convert a single text run to markdown.""" content = text_run.get("content", "") style = text_run.get("textStyle", {}) @@ -120,10 +136,12 @@ def _convert_text_run(text_run: dict[str, Any]) -> str: if not text: return "" - return _apply_text_style(text, style) + return _apply_text_style(text, style, skip_strikethrough) -def _apply_text_style(text: str, style: dict[str, Any]) -> str: +def _apply_text_style( + text: str, style: dict[str, Any], skip_strikethrough: bool = False +) -> str: """Apply markdown formatting based on text style.""" link = style.get("link", {}) url = link.get("url") @@ -143,7 +161,7 @@ def _apply_text_style(text: str, style: dict[str, Any]) -> str: elif italic: text = f"*{text}*" - if strikethrough: + if strikethrough and not skip_strikethrough: text = f"~~{text}~~" if url: @@ -163,6 +181,37 @@ def _is_ordered_list(lists_meta: dict[str, Any], list_id: str, nesting: int) -> return False +def _is_checklist(lists_meta: dict[str, Any], list_id: str, nesting: int) -> bool: + """Check if a list at a given nesting level is a checklist. + + Google Docs checklists are distinguished from regular bullet lists by having + GLYPH_TYPE_UNSPECIFIED with no glyphSymbol — the Docs UI renders interactive + checkboxes rather than a static glyph character. + """ + list_info = lists_meta.get(list_id, {}) + nesting_levels = list_info.get("listProperties", {}).get("nestingLevels", []) + if nesting < len(nesting_levels): + level = nesting_levels[nesting] + glyph_type = level.get("glyphType", "") + has_glyph_symbol = "glyphSymbol" in level + return glyph_type in ("", "GLYPH_TYPE_UNSPECIFIED") and not has_glyph_symbol + return False + + +def _is_checked(para: dict[str, Any]) -> bool: + """Check if a checklist item is checked. + + Google Docs marks checked checklist items by applying strikethrough + formatting to the paragraph text. + """ + for elem in para.get("elements", []): + if "textRun" in elem: + content = elem["textRun"].get("content", "").strip() + if content: + return elem["textRun"].get("textStyle", {}).get("strikethrough", False) + return False + + def _convert_table(table: dict[str, Any]) -> str: """Convert a table element to markdown.""" rows = table.get("tableRows", []) diff --git a/gdrive/drive_helpers.py b/gdrive/drive_helpers.py index db6ec40..55e342a 100644 --- a/gdrive/drive_helpers.py +++ b/gdrive/drive_helpers.py @@ -305,6 +305,7 @@ def resolve_file_type_mime(file_type: str) -> str: ) return FILE_TYPE_MIME_MAP[lower] + BASE_SHORTCUT_FIELDS = ( "id, mimeType, parents, shortcutDetails(targetId, targetMimeType)" ) diff --git a/tests/gdocs/test_docs_markdown.py b/tests/gdocs/test_docs_markdown.py index fceabfe..804c390 100644 --- a/tests/gdocs/test_docs_markdown.py +++ b/tests/gdocs/test_docs_markdown.py @@ -270,6 +270,69 @@ class TestLists: assert "- Item two" in md +CHECKLIST_DOC = { + "title": "Checklist Test", + "lists": { + "kix.checklist001": { + "listProperties": { + "nestingLevels": [ + {"glyphType": "GLYPH_TYPE_UNSPECIFIED"}, + ] + } + } + }, + "body": { + "content": [ + {"sectionBreak": {"sectionStyle": {}}}, + { + "paragraph": { + "elements": [ + {"textRun": {"content": "Buy groceries\n", "textStyle": {}}} + ], + "paragraphStyle": {"namedStyleType": "NORMAL_TEXT"}, + "bullet": {"listId": "kix.checklist001", "nestingLevel": 0}, + } + }, + { + "paragraph": { + "elements": [ + { + "textRun": { + "content": "Walk the dog\n", + "textStyle": {"strikethrough": True}, + } + } + ], + "paragraphStyle": {"namedStyleType": "NORMAL_TEXT"}, + "bullet": {"listId": "kix.checklist001", "nestingLevel": 0}, + } + }, + ] + }, +} + + +class TestChecklists: + def test_unchecked(self): + md = convert_doc_to_markdown(CHECKLIST_DOC) + assert "- [ ] Buy groceries" in md + + def test_checked(self): + md = convert_doc_to_markdown(CHECKLIST_DOC) + assert "- [x] Walk the dog" in md + + def test_checked_no_strikethrough(self): + """Checked items should not have redundant ~~strikethrough~~ markdown.""" + md = convert_doc_to_markdown(CHECKLIST_DOC) + assert "~~Walk the dog~~" not in md + + def test_regular_bullet_not_checklist(self): + """Bullet lists with glyphSymbol should remain as plain bullets.""" + md = convert_doc_to_markdown(LIST_DOC) + assert "[ ]" not in md + assert "[x]" not in md + + class TestEmptyDoc: def test_empty(self): md = convert_doc_to_markdown({"title": "Empty", "body": {"content": []}}) diff --git a/uv.lock b/uv.lock index 7842e03..6802502 100644 --- a/uv.lock +++ b/uv.lock @@ -2035,7 +2035,7 @@ wheels = [ [[package]] name = "workspace-mcp" -version = "1.13.0" +version = "1.13.1" source = { editable = "." } dependencies = [ { name = "cryptography" },