From 9b152ded168ccf1a6c5b1b70fa4e0752fb30b3ee Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Tue, 11 Jun 2024 10:31:53 +0200 Subject: [PATCH 1/2] chore(deps): add requests to perform http/s requests --- requirements/base.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/base.txt b/requirements/base.txt index fc74df5..40d29b2 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,4 +4,5 @@ GitPython>=3.1,<3.2 mkdocs>=1.5,<2 +requests>=2.31,<3 tzdata==2024.* ; python_version >= "3.9" and sys_platform == "win32" From b4cfa0f149e89f049d8c39bb0a2a0c7492f0467e Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Tue, 11 Jun 2024 11:34:56 +0200 Subject: [PATCH 2/2] refacto(http): use requests instead of urllib to beter handle HTTP requests --- mkdocs_rss_plugin/util.py | 68 ++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/mkdocs_rss_plugin/util.py b/mkdocs_rss_plugin/util.py index e966fe0..0d6cfe6 100644 --- a/mkdocs_rss_plugin/util.py +++ b/mkdocs_rss_plugin/util.py @@ -6,15 +6,12 @@ # standard library import logging -import ssl from collections.abc import Iterable from datetime import date, datetime from email.utils import format_datetime from mimetypes import guess_type from pathlib import Path from typing import Any -from urllib import request -from urllib.error import HTTPError, URLError from urllib.parse import urlencode, urlparse, urlunparse # 3rd party @@ -24,6 +21,8 @@ from mkdocs.plugins import get_plugin_logger from mkdocs.structure.pages import Page from mkdocs.utils import get_build_datetime +from requests import Session +from requests.exceptions import HTTPError # package from mkdocs_rss_plugin.constants import MKDOCS_LOGGER_NAME, REMOTE_REQUEST_HEADERS @@ -106,7 +105,11 @@ def __init__( # save integrations self.social_cards = integration_material_social_cards - def build_url(self, base_url: str, path: str, args_dict: dict = None) -> str: + # http/s session + self.req_session = Session() + self.req_session.headers.update(REMOTE_REQUEST_HEADERS) + + def build_url(self, base_url: str, path: str, args_dict: dict | None = None) -> str: """Build URL using base URL, cumulating existing and passed path, \ then adding URL arguments. @@ -604,51 +607,44 @@ def get_remote_image_length( image_url: str, http_method: str = "HEAD", attempt: int = 0, - ssl_context: ssl.SSLContext = None, + ssl_verify: bool = True, ) -> int | None: - """Retrieve length for remote images (starting with 'http' \ - in meta.image or meta.illustration). \ - It tries to perform a HEAD request and get the length from the headers. \ - If it fails, it tries again with a GET and disabling SSL verification. - - :param image_url: remote image URL - :type image_url: str - :param http_method: HTTP method used to perform request, defaults to "HEAD" - :type http_method: str, optional - :param attempt: request tries counter, defaults to 0 - :type attempt: int, optional - :param ssl_context: SSL context, defaults to None - :type ssl_context: ssl.SSLContext, optional - - :return: image length as str or None - :rtype: Optional[int] + """Retrieve length for remote images (starting with 'http'). + + Firstly, it tries to perform a HEAD request and get the length from the headers. \ + If it fails, it tries again with a GET and disabling SSL verification. + + Args: + image_url (str): image URL + http_method (str, optional): HTTP method to use for the request. + Defaults to "HEAD". + attempt (int, optional): request tries counter. Defaults to 0. + ssl_verify (bool, optional): option to perform SSL verification or not. + Defaults to True. + + Returns: + int | None: image length as int or None """ - # prepare request - req = request.Request( - image_url, - method=http_method, - headers=REMOTE_REQUEST_HEADERS, - ) # first, try HEAD request to avoid downloading the image try: attempt += 1 - remote_img = request.urlopen(url=req, context=ssl_context) - img_length = remote_img.getheader("content-length") - except (HTTPError, URLError) as err: - logging.warning( + req_response = self.req_session.request( + method=http_method, url=image_url, verify=ssl_verify + ) + req_response.raise_for_status() + img_length = req_response.headers.get("content-length") + except HTTPError as err: + logger.debug( f"Remote image could not been reached: {image_url}. " f"Trying again with GET and disabling SSL verification. Attempt: {attempt}. " f"Trace: {err}" ) if attempt < 2: return self.get_remote_image_length( - image_url, - http_method="GET", - attempt=attempt, - ssl_context=ssl._create_unverified_context(), + image_url, http_method="GET", attempt=attempt, ssl_verify=False ) else: - logging.error( + logger.info( f"Remote image is not reachable: {image_url} after " f"{attempt} attempts. Trace: {err}" )