Merge pull request #191 from Gallaecio/request-headers-from-bytes

Add from_bytes_dict to HttpRequestHeaders
scrapinghub · Nov 20, 2023 · d177243 · d177243
2 parents d45aa1e + 1a43e0c
commit d177243
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 51 deletions.
diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py
@@ -210,7 +210,14 @@ def test_http_request_init_with_response_url() -> None:
     assert str(req.url) == str(resp.url)
 
 
-def test_http_response_headers_from_bytes_dict() -> None:
+@pytest.mark.parametrize(
+    "cls",
+    (
+        HttpRequestHeaders,
+        HttpResponseHeaders,
+    ),
+)
+def test_http_headers_from_bytes_dict(cls) -> None:
     raw_headers = {
         b"Content-Length": [b"316"],
         b"Content-Encoding": [b"gzip", b"br"],
@@ -219,7 +226,7 @@ def test_http_response_headers_from_bytes_dict() -> None:
         "X-missing": None,
         "X-tuple": (b"x", "y"),
     }
-    headers = HttpResponseHeaders.from_bytes_dict(raw_headers)
+    headers = cls.from_bytes_dict(raw_headers)
 
     assert headers.get("content-length") == "316"
     assert headers.get("content-encoding") == "gzip"
@@ -231,12 +238,19 @@ def test_http_response_headers_from_bytes_dict() -> None:
     assert headers.getall("x-tuple") == ["x", "y"]
 
 
-def test_http_response_headers_from_bytes_dict_err() -> None:
+@pytest.mark.parametrize(
+    "cls",
+    (
+        HttpRequestHeaders,
+        HttpResponseHeaders,
+    ),
+)
+def test_http_response_headers_from_bytes_dict_err(cls) -> None:
     with pytest.raises(ValueError):
-        HttpResponseHeaders.from_bytes_dict({b"Content-Length": [316]})
+        cls.from_bytes_dict({b"Content-Length": [316]})
 
     with pytest.raises(ValueError):
-        HttpResponseHeaders.from_bytes_dict({b"Content-Length": 316})
+        cls.from_bytes_dict({b"Content-Length": 316})
 
 
 def test_http_response_headers_init_requests() -> None:

diff --git a/web_poet/_base.py b/web_poet/_base.py
@@ -3,10 +3,11 @@
 In general, users shouldn't import and use the contents of this module.
 """
 
-from typing import Dict, List, Type, TypeVar
+from typing import AnyStr, Dict, List, Tuple, Type, TypeVar, Union
 
 from multidict import CIMultiDict
 
+_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
 T_headers = TypeVar("T_headers", bound="_HttpHeaders")
 
 
@@ -31,3 +32,44 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
         <_HttpHeaders('Content-Encoding': 'gzip', 'content-length': '648')>
         """
         return cls([(pair["name"], pair["value"]) for pair in arg])
+
+    @classmethod
+    def from_bytes_dict(
+        cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8"
+    ) -> T_headers:
+        """An alternative constructor for instantiation where the header-value
+        pairs could be in raw bytes form.
+
+        This supports multiple header values in the form of ``List[bytes]`` and
+        ``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
+        also works and wouldn't break the decoding process at all.
+
+        By default, it converts the ``bytes`` value using "utf-8". However, this
+        can easily be overridden using the ``encoding`` parameter.
+
+        >>> raw_values = {
+        ...     b"Content-Encoding": [b"gzip", b"br"],
+        ...     b"Content-Type": [b"text/html"],
+        ...     b"content-length": b"648",
+        ... }
+        >>> headers = _HttpHeaders.from_bytes_dict(raw_values)
+        >>> headers
+        <_HttpHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
+        """
+
+        def _norm(data):
+            if isinstance(data, str) or data is None:
+                return data
+            elif isinstance(data, bytes):
+                return data.decode(encoding)
+            raise ValueError(f"Expecting str or bytes. Received {type(data)}")
+
+        converted = []
+
+        for header, value in arg.items():
+            if isinstance(value, list) or isinstance(value, tuple):
+                converted.extend([(_norm(header), _norm(v)) for v in value])
+            else:
+                converted.append((_norm(header), _norm(value)))
+
+        return cls(converted)
diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py
@@ -1,6 +1,6 @@
 import json
 from hashlib import sha1
-from typing import Any, AnyStr, Dict, List, Optional, Tuple, Type, TypeVar, Union
+from typing import Any, Optional, TypeVar, Union
 from urllib.parse import urljoin
 
 import attrs
@@ -20,9 +20,7 @@
 from .url import RequestUrl as _RequestUrl
 from .url import ResponseUrl as _ResponseUrl
 
-T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")
-
-_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
+T_headers = TypeVar("T_headers", bound=_HttpHeaders)
 
 
 RequestUrl = _create_deprecated_class("RequestUrl", _RequestUrl)
@@ -113,47 +111,6 @@ class HttpResponseHeaders(_HttpHeaders):
     the API spec of :class:`multidict.CIMultiDict`.
     """
 
-    @classmethod
-    def from_bytes_dict(
-        cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8"
-    ) -> T_headers:
-        """An alternative constructor for instantiation where the header-value
-        pairs could be in raw bytes form.
-
-        This supports multiple header values in the form of ``List[bytes]`` and
-        ``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
-        also works and wouldn't break the decoding process at all.
-
-        By default, it converts the ``bytes`` value using "utf-8". However, this
-        can easily be overridden using the ``encoding`` parameter.
-
-        >>> raw_values = {
-        ...     b"Content-Encoding": [b"gzip", b"br"],
-        ...     b"Content-Type": [b"text/html"],
-        ...     b"content-length": b"648",
-        ... }
-        >>> headers = HttpResponseHeaders.from_bytes_dict(raw_values)
-        >>> headers
-        <HttpResponseHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
-        """
-
-        def _norm(data):
-            if isinstance(data, str) or data is None:
-                return data
-            elif isinstance(data, bytes):
-                return data.decode(encoding)
-            raise ValueError(f"Expecting str or bytes. Received {type(data)}")
-
-        converted = []
-
-        for header, value in arg.items():
-            if isinstance(value, list) or isinstance(value, tuple):
-                converted.extend([(_norm(header), _norm(v)) for v in value])
-            else:
-                converted.append((_norm(header), _norm(value)))
-
-        return cls(converted)
-
     def declared_encoding(self) -> Optional[str]:
         """Return encoding detected from the Content-Type header, or None
         if encoding is not found"""