Skip to content

Commit

Permalink
Merge pull request #191 from Gallaecio/request-headers-from-bytes
Browse files Browse the repository at this point in the history
Add from_bytes_dict to HttpRequestHeaders
  • Loading branch information
kmike authored Nov 20, 2023
2 parents d45aa1e + 1a43e0c commit d177243
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 51 deletions.
24 changes: 19 additions & 5 deletions tests/test_page_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,14 @@ def test_http_request_init_with_response_url() -> None:
assert str(req.url) == str(resp.url)


def test_http_response_headers_from_bytes_dict() -> None:
@pytest.mark.parametrize(
"cls",
(
HttpRequestHeaders,
HttpResponseHeaders,
),
)
def test_http_headers_from_bytes_dict(cls) -> None:
raw_headers = {
b"Content-Length": [b"316"],
b"Content-Encoding": [b"gzip", b"br"],
Expand All @@ -219,7 +226,7 @@ def test_http_response_headers_from_bytes_dict() -> None:
"X-missing": None,
"X-tuple": (b"x", "y"),
}
headers = HttpResponseHeaders.from_bytes_dict(raw_headers)
headers = cls.from_bytes_dict(raw_headers)

assert headers.get("content-length") == "316"
assert headers.get("content-encoding") == "gzip"
Expand All @@ -231,12 +238,19 @@ def test_http_response_headers_from_bytes_dict() -> None:
assert headers.getall("x-tuple") == ["x", "y"]


def test_http_response_headers_from_bytes_dict_err() -> None:
@pytest.mark.parametrize(
"cls",
(
HttpRequestHeaders,
HttpResponseHeaders,
),
)
def test_http_response_headers_from_bytes_dict_err(cls) -> None:
with pytest.raises(ValueError):
HttpResponseHeaders.from_bytes_dict({b"Content-Length": [316]})
cls.from_bytes_dict({b"Content-Length": [316]})

with pytest.raises(ValueError):
HttpResponseHeaders.from_bytes_dict({b"Content-Length": 316})
cls.from_bytes_dict({b"Content-Length": 316})


def test_http_response_headers_init_requests() -> None:
Expand Down
44 changes: 43 additions & 1 deletion web_poet/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
In general, users shouldn't import and use the contents of this module.
"""

from typing import Dict, List, Type, TypeVar
from typing import AnyStr, Dict, List, Tuple, Type, TypeVar, Union

from multidict import CIMultiDict

_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
T_headers = TypeVar("T_headers", bound="_HttpHeaders")


Expand All @@ -31,3 +32,44 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
<_HttpHeaders('Content-Encoding': 'gzip', 'content-length': '648')>
"""
return cls([(pair["name"], pair["value"]) for pair in arg])

@classmethod
def from_bytes_dict(
cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8"
) -> T_headers:
"""An alternative constructor for instantiation where the header-value
pairs could be in raw bytes form.
This supports multiple header values in the form of ``List[bytes]`` and
``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
also works and wouldn't break the decoding process at all.
By default, it converts the ``bytes`` value using "utf-8". However, this
can easily be overridden using the ``encoding`` parameter.
>>> raw_values = {
... b"Content-Encoding": [b"gzip", b"br"],
... b"Content-Type": [b"text/html"],
... b"content-length": b"648",
... }
>>> headers = _HttpHeaders.from_bytes_dict(raw_values)
>>> headers
<_HttpHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
"""

def _norm(data):
if isinstance(data, str) or data is None:
return data
elif isinstance(data, bytes):
return data.decode(encoding)
raise ValueError(f"Expecting str or bytes. Received {type(data)}")

converted = []

for header, value in arg.items():
if isinstance(value, list) or isinstance(value, tuple):
converted.extend([(_norm(header), _norm(v)) for v in value])
else:
converted.append((_norm(header), _norm(value)))

return cls(converted)
47 changes: 2 additions & 45 deletions web_poet/page_inputs/http.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from hashlib import sha1
from typing import Any, AnyStr, Dict, List, Optional, Tuple, Type, TypeVar, Union
from typing import Any, Optional, TypeVar, Union
from urllib.parse import urljoin

import attrs
Expand All @@ -20,9 +20,7 @@
from .url import RequestUrl as _RequestUrl
from .url import ResponseUrl as _ResponseUrl

T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")

_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
T_headers = TypeVar("T_headers", bound=_HttpHeaders)


RequestUrl = _create_deprecated_class("RequestUrl", _RequestUrl)
Expand Down Expand Up @@ -113,47 +111,6 @@ class HttpResponseHeaders(_HttpHeaders):
the API spec of :class:`multidict.CIMultiDict`.
"""

@classmethod
def from_bytes_dict(
cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8"
) -> T_headers:
"""An alternative constructor for instantiation where the header-value
pairs could be in raw bytes form.
This supports multiple header values in the form of ``List[bytes]`` and
``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
also works and wouldn't break the decoding process at all.
By default, it converts the ``bytes`` value using "utf-8". However, this
can easily be overridden using the ``encoding`` parameter.
>>> raw_values = {
... b"Content-Encoding": [b"gzip", b"br"],
... b"Content-Type": [b"text/html"],
... b"content-length": b"648",
... }
>>> headers = HttpResponseHeaders.from_bytes_dict(raw_values)
>>> headers
<HttpResponseHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
"""

def _norm(data):
if isinstance(data, str) or data is None:
return data
elif isinstance(data, bytes):
return data.decode(encoding)
raise ValueError(f"Expecting str or bytes. Received {type(data)}")

converted = []

for header, value in arg.items():
if isinstance(value, list) or isinstance(value, tuple):
converted.extend([(_norm(header), _norm(v)) for v in value])
else:
converted.append((_norm(header), _norm(value)))

return cls(converted)

def declared_encoding(self) -> Optional[str]:
"""Return encoding detected from the Content-Type header, or None
if encoding is not found"""
Expand Down

0 comments on commit d177243

Please sign in to comment.