defusedxml

This commit is contained in:
Taylor Wilsdon
2026-03-01 17:22:34 -05:00
parent 4e48fd2bb5
commit d8567f2beb
3 changed files with 15 additions and 2 deletions

View File

@@ -2,7 +2,6 @@ import io
import logging
import os
import zipfile
import xml.etree.ElementTree as ET
import ssl
import asyncio
import functools
@@ -10,6 +9,8 @@ import functools
from pathlib import Path
from typing import List, Optional
from defusedxml import ElementTree as ET
from googleapiclient.errors import HttpError
from .api_enablement import get_api_enablement_message
from auth.google_auth import GoogleAuthenticationError
@@ -226,7 +227,7 @@ def extract_office_xml_text(file_bytes: bytes, mime_type: str) -> Optional[str]:
"""
Very light-weight XML scraper for Word, Excel, PowerPoint files.
Returns plain-text if something readable is found, else None.
No external deps just std-lib zipfile + ElementTree.
Uses zipfile + defusedxml.ElementTree.
"""
shared_strings: List[str] = []
ns_excel_main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"

View File

@@ -22,6 +22,7 @@ dependencies = [
"python-dotenv>=1.1.0",
"pyyaml>=6.0.2",
"cryptography>=45.0.0",
"defusedxml>=0.7.1",
]
classifiers = [
"Development Status :: 4 - Beta",

11
uv.lock generated
View File

@@ -404,6 +404,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl", hash = "sha256:0642c93601e554ca6b7b9abd81093847ea4448b2616280f2a0952416574e8c7a", size = 199807 },
]
[[package]]
name = "defusedxml"
version = "0.7.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 },
]
[[package]]
name = "dnspython"
version = "2.8.0"
@@ -2039,6 +2048,7 @@ version = "1.13.1"
source = { editable = "." }
dependencies = [
{ name = "cryptography" },
{ name = "defusedxml" },
{ name = "fastapi" },
{ name = "fastmcp" },
{ name = "google-api-python-client" },
@@ -2098,6 +2108,7 @@ valkey = [
[package.metadata]
requires-dist = [
{ name = "cryptography", specifier = ">=45.0.0" },
{ name = "defusedxml", specifier = ">=0.7.1" },
{ name = "fastapi", specifier = ">=0.115.12" },
{ name = "fastmcp", specifier = ">=3.0.2" },
{ name = "google-api-python-client", specifier = ">=2.168.0" },