-
-
Notifications
You must be signed in to change notification settings - Fork 856
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add URL parsing tests from WHATWG (#3188)
Co-authored-by: Kar Petrosyan <92274156+karpetrosyan@users.noreply.github.com>
- Loading branch information
1 parent
92e9dfb
commit db9072f
Showing
4 changed files
with
9,819 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# The WHATWG have various tests that can be used to validate the URL parsing. | ||
# | ||
# https://url.spec.whatwg.org/ | ||
|
||
import json | ||
|
||
import pytest | ||
|
||
from httpx._urlparse import urlparse | ||
|
||
# URL test cases from... | ||
# https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json | ||
with open("tests/models/whatwg.json", "r") as input: | ||
test_cases = json.load(input) | ||
test_cases = [ | ||
item | ||
for item in test_cases | ||
if not isinstance(item, str) and not item.get("failure") | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("test_case", test_cases) | ||
def test_urlparse(test_case): | ||
if test_case["href"] in ("a: foo.com", "lolscheme:x x#x%20x"): | ||
# Skip these two test cases. | ||
# WHATWG cases where are not using percent-encoding for the space character. | ||
# Anyone know what's going on here? | ||
return | ||
|
||
p = urlparse(test_case["href"]) | ||
|
||
# Test cases include the protocol with the trailing ":" | ||
protocol = p.scheme + ":" | ||
# Include the square brackets for IPv6 addresses. | ||
hostname = f"[{p.host}]" if ":" in p.host else p.host | ||
# The test cases use a string representation of the port. | ||
port = "" if p.port is None else str(p.port) | ||
# I have nothing to say about this one. | ||
path = p.path | ||
# The 'search' and 'hash' components in the whatwg tests are semantic, not literal. | ||
# Our parsing differentiates between no query/hash and empty-string query/hash. | ||
search = "" if p.query in (None, "") else "?" + str(p.query) | ||
hash = "" if p.fragment in (None, "") else "#" + str(p.fragment) | ||
|
||
# URL hostnames are case-insensitive. | ||
# We normalize these, unlike the WHATWG test cases. | ||
assert protocol == test_case["protocol"] | ||
assert hostname.lower() == test_case["hostname"].lower() | ||
assert port == test_case["port"] | ||
assert path == test_case["pathname"] | ||
assert search == test_case["search"] | ||
assert hash == test_case["hash"] |
Oops, something went wrong.