diff --git a/core/utils.py b/core/utils.py index c5c61ee..ee91fb3 100644 --- a/core/utils.py +++ b/core/utils.py @@ -2,7 +2,6 @@ import io import logging import os import zipfile -import xml.etree.ElementTree as ET import ssl import asyncio import functools @@ -10,6 +9,8 @@ import functools from pathlib import Path from typing import List, Optional +from defusedxml import ElementTree as ET + from googleapiclient.errors import HttpError from .api_enablement import get_api_enablement_message from auth.google_auth import GoogleAuthenticationError @@ -226,7 +227,7 @@ def extract_office_xml_text(file_bytes: bytes, mime_type: str) -> Optional[str]: """ Very light-weight XML scraper for Word, Excel, PowerPoint files. Returns plain-text if something readable is found, else None. - No external deps – just std-lib zipfile + ElementTree. + Uses zipfile + defusedxml.ElementTree. """ shared_strings: List[str] = [] ns_excel_main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main" diff --git a/pyproject.toml b/pyproject.toml index 708851e..8539ef8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "python-dotenv>=1.1.0", "pyyaml>=6.0.2", "cryptography>=45.0.0", + "defusedxml>=0.7.1", ] classifiers = [ "Development Status :: 4 - Beta", diff --git a/uv.lock b/uv.lock index 6802502..859431b 100644 --- a/uv.lock +++ b/uv.lock @@ -404,6 +404,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl", hash = "sha256:0642c93601e554ca6b7b9abd81093847ea4448b2616280f2a0952416574e8c7a", size = 199807 }, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 }, +] + [[package]] name = "dnspython" version = "2.8.0" @@ -2039,6 +2048,7 @@ version = "1.13.1" source = { editable = "." } dependencies = [ { name = "cryptography" }, + { name = "defusedxml" }, { name = "fastapi" }, { name = "fastmcp" }, { name = "google-api-python-client" }, @@ -2098,6 +2108,7 @@ valkey = [ [package.metadata] requires-dist = [ { name = "cryptography", specifier = ">=45.0.0" }, + { name = "defusedxml", specifier = ">=0.7.1" }, { name = "fastapi", specifier = ">=0.115.12" }, { name = "fastmcp", specifier = ">=3.0.2" }, { name = "google-api-python-client", specifier = ">=2.168.0" },