Merge pull request #533 from taylorwilsdon/issues/526

enh: defusedxml
This commit is contained in:
Taylor Wilsdon
2026-03-01 17:26:06 -05:00
committed by GitHub
3 changed files with 15 additions and 2 deletions

View File

@@ -2,7 +2,6 @@ import io
import logging import logging
import os import os
import zipfile import zipfile
import xml.etree.ElementTree as ET
import ssl import ssl
import asyncio import asyncio
import functools import functools
@@ -10,6 +9,8 @@ import functools
from pathlib import Path from pathlib import Path
from typing import List, Optional from typing import List, Optional
from defusedxml import ElementTree as ET
from googleapiclient.errors import HttpError from googleapiclient.errors import HttpError
from .api_enablement import get_api_enablement_message from .api_enablement import get_api_enablement_message
from auth.google_auth import GoogleAuthenticationError from auth.google_auth import GoogleAuthenticationError
@@ -226,7 +227,7 @@ def extract_office_xml_text(file_bytes: bytes, mime_type: str) -> Optional[str]:
""" """
Very light-weight XML scraper for Word, Excel, PowerPoint files. Very light-weight XML scraper for Word, Excel, PowerPoint files.
Returns plain-text if something readable is found, else None. Returns plain-text if something readable is found, else None.
No external deps just std-lib zipfile + ElementTree. Uses zipfile + defusedxml.ElementTree.
""" """
shared_strings: List[str] = [] shared_strings: List[str] = []
ns_excel_main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main" ns_excel_main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"

View File

@@ -22,6 +22,7 @@ dependencies = [
"python-dotenv>=1.1.0", "python-dotenv>=1.1.0",
"pyyaml>=6.0.2", "pyyaml>=6.0.2",
"cryptography>=45.0.0", "cryptography>=45.0.0",
"defusedxml>=0.7.1",
] ]
classifiers = [ classifiers = [
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",

11
uv.lock generated
View File

@@ -404,6 +404,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl", hash = "sha256:0642c93601e554ca6b7b9abd81093847ea4448b2616280f2a0952416574e8c7a", size = 199807 }, { url = "https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl", hash = "sha256:0642c93601e554ca6b7b9abd81093847ea4448b2616280f2a0952416574e8c7a", size = 199807 },
] ]
[[package]]
name = "defusedxml"
version = "0.7.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 },
]
[[package]] [[package]]
name = "dnspython" name = "dnspython"
version = "2.8.0" version = "2.8.0"
@@ -2039,6 +2048,7 @@ version = "1.13.1"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "cryptography" }, { name = "cryptography" },
{ name = "defusedxml" },
{ name = "fastapi" }, { name = "fastapi" },
{ name = "fastmcp" }, { name = "fastmcp" },
{ name = "google-api-python-client" }, { name = "google-api-python-client" },
@@ -2098,6 +2108,7 @@ valkey = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "cryptography", specifier = ">=45.0.0" }, { name = "cryptography", specifier = ">=45.0.0" },
{ name = "defusedxml", specifier = ">=0.7.1" },
{ name = "fastapi", specifier = ">=0.115.12" }, { name = "fastapi", specifier = ">=0.115.12" },
{ name = "fastmcp", specifier = ">=3.0.2" }, { name = "fastmcp", specifier = ">=3.0.2" },
{ name = "google-api-python-client", specifier = ">=2.168.0" }, { name = "google-api-python-client", specifier = ">=2.168.0" },