Skip to content

Commit

Permalink
Add URL encoding helpers
Browse files Browse the repository at this point in the history
  - Added is_encoded_url(), encode_url() and decode_url()
  • Loading branch information
brunato committed Apr 26, 2024
1 parent dc23369 commit b0aebfb
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 2 deletions.
60 changes: 59 additions & 1 deletion tests/test_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import xmlschema.locations
from xmlschema.locations import LocationPath, LocationPosixPath, LocationWindowsPath, \
is_url, is_local_url, is_remote_url, url_path_is_file, normalize_url, \
normalize_locations, match_location
normalize_locations, match_location, is_encoded_url, encode_url, decode_url

TEST_CASES_DIR = str(pathlib.Path(__file__).absolute().parent.joinpath('test_cases'))

Expand Down Expand Up @@ -85,6 +85,33 @@ def check_url(self, url, expected):
expected_path = PurePath(expected_parts.path)
self.assertEqual(path, expected_path, "%r: Paths differ." % url)

def test_urlsplit(self):
url = "https://xmlschema.test/schema/test.xsd"
self.assertEqual(
urlsplit(url), ("https", "xmlschema.test", "/schema/test.xsd", '', '')
)

url = "https://xmlschema.test/xs:schema/test.xsd"
self.assertEqual(
urlsplit(url), ("https", "xmlschema.test", "/xs:schema/test.xsd", '', '')
)

url = "https://xmlschema.test/schema/test.xsd#xs:element"
self.assertEqual(
urlsplit(url), ("https", "xmlschema.test", "/schema/test.xsd", '', 'xs:element')
)

url = "https://xmlschema.test@username:password/schema/test.xsd"
self.assertEqual(
urlsplit(url),
("https", "xmlschema.test@username:password", "/schema/test.xsd", '', '')
)

url = "https://xmlschema.test/schema/test.xsd?id=10"
self.assertEqual(
urlsplit(url), ("https", "xmlschema.test", "/schema/test.xsd", 'id=10', '')
)

def test_path_from_uri(self):
with self.assertRaises(ValueError) as ec:
LocationPath.from_uri('')
Expand Down Expand Up @@ -346,6 +373,37 @@ def test_url_path_is_file_function(self):
with patch('platform.system', MagicMock(return_value="Windows")):
self.assertFalse(url_path_is_file('file:///c:/Windows/unknown'))

def test_is_encoded_url(self):
self.assertFalse(is_encoded_url("https://xmlschema.test/schema/test.xsd"))
self.assertTrue(is_encoded_url("https://xmlschema.test/schema/issue%20%231999.xsd"))
self.assertFalse(is_encoded_url("a b c"))
self.assertFalse(is_encoded_url("a+b+c"))
self.assertFalse(is_encoded_url("a b+c"))

def test_encode_and_decode_url(self):
url = "https://xmlschema.test/schema/test.xsd"
self.assertEqual(encode_url(url), url)
self.assertEqual(decode_url(encode_url(url)), url)

url = "https://xmlschema.test/schema 2/test.xsd"
self.assertEqual(encode_url(url), "https://xmlschema.test/schema%202/test.xsd")
self.assertEqual(decode_url(encode_url(url)), url)

url = "https://xmlschema.test@u:p/xs:schema@2/test.xsd"
self.assertEqual(encode_url(url), "https://xmlschema.test@u:p/xs%3Aschema%402/test.xsd")
self.assertEqual(decode_url(encode_url(url)), url)

url = "https://xmlschema.test/schema 2/test.xsd?name=2 id=3"
self.assertEqual(
encode_url(url), "https://xmlschema.test/schema%202/test.xsd?name=2%20id=3")
self.assertEqual(decode_url(encode_url(url)), url)

self.assertEqual(encode_url(url, method='html'),
"https://xmlschema.test/schema%202/test.xsd?name=2+id=3")
self.assertEqual(decode_url(encode_url(url, method='html'), method='html'), url)
self.assertEqual(decode_url(encode_url(url), method='html'), url)
self.assertNotEqual(decode_url(encode_url(url, method='html')), url)

def test_normalize_locations_function(self):
locations = normalize_locations(
[('tns0', 'alpha'), ('tns1', 'http://example.com/beta')], base_url='/home/user'
Expand Down
46 changes: 45 additions & 1 deletion xmlschema/locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from collections.abc import MutableMapping
from pathlib import Path, PurePath, PurePosixPath, PureWindowsPath
from typing import Optional, Iterable
from urllib.parse import urlsplit, urlunsplit, unquote, quote_from_bytes
from urllib.parse import urlsplit, urlunsplit, quote, quote_plus, unquote, unquote_plus, quote_from_bytes

from .exceptions import XMLSchemaValueError
from .aliases import NormalizedLocationsType, LocationsType
Expand Down Expand Up @@ -239,6 +239,50 @@ def url_path_is_file(url: str) -> bool:
return os.path.isfile(path)


def is_encoded_url(url: str) -> bool:
"""
Determines whether the given URL is encoded. The case with '+' and without
spaces is not univocal and the plus signs are ignored for the result.
"""
return unquote(url) != url or \
'+' in url and ' ' not in url and \
unquote(url.replace('+', '$')) != url.replace('+', '$')


def encode_url(url: str, method: str = 'xml') -> str:
"""Encode the given url, if necessary."""
if is_encoded_url(url):
return url

query_quote = quote_plus if method == 'html' else quote

parts = urlsplit(url)
return urlunsplit((
parts.scheme,
quote(parts.netloc, safe='@:'),
quote(parts.path, safe='/'),
query_quote(parts.query, safe=';/?:@=&'),
query_quote(parts.fragment, safe=';/?:@=&'),
))


def decode_url(url: str, method: str = 'xml') -> str:
"""Decode the given url, if necessary."""
if not is_encoded_url(url):
return url

query_unquote = unquote_plus if method == 'html' else unquote

parts = urlsplit(url)
return urlunsplit((
parts.scheme,
unquote(parts.netloc),
unquote(parts.path),
query_unquote(parts.query),
query_unquote(parts.fragment),
))


def normalize_locations(locations: LocationsType,
base_url: Optional[str] = None,
keep_relative: bool = False) -> NormalizedLocationsType:
Expand Down

0 comments on commit b0aebfb

Please sign in to comment.