diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 9610419f078..2a9d9a13e3d 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -14,7 +14,8 @@ import itertools __name__ = '' -**Source code:** :source:`Lib/collections.py` and :source:`Lib/_abcoll.py` +**Source code:** :source:`Lib/collections/__init__.py` and +:source:`Lib/_abcoll.py` -------------- diff --git a/Lib/_abcoll.py b/Lib/_abcoll.py index eb0fccb90af..b5736e06bbf 100644 --- a/Lib/_abcoll.py +++ b/Lib/_abcoll.py @@ -619,8 +619,7 @@ def __contains__(self, key): return key in self._mapping def __iter__(self): - for key in self._mapping: - yield key + yield from self._mapping KeysView.register(type({}.viewkeys())) diff --git a/Lib/collections.py b/Lib/collections/__init__.py similarity index 99% rename from Lib/collections.py rename to Lib/collections/__init__.py index f2ad9726d57..577aaf9a367 100644 --- a/Lib/collections.py +++ b/Lib/collections/__init__.py @@ -11,8 +11,9 @@ ''' __all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict'] -# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py. -# They should however be considered an integral part of collections.py. + +# For backwards compatibility, continue to make the collections ABCs +# available through the collections module. from _abcoll import * import _abcoll __all__ += _abcoll.__all__ diff --git a/Lib/collections/abc.py b/Lib/collections/abc.py new file mode 100644 index 00000000000..e50125744bc --- /dev/null +++ b/Lib/collections/abc.py @@ -0,0 +1,2 @@ +from _abcoll import * +from _abcoll import __all__ diff --git a/Lib/copyreg.py b/Lib/copyreg.py new file mode 100644 index 00000000000..86246b7fae6 --- /dev/null +++ b/Lib/copyreg.py @@ -0,0 +1,2 @@ +from copy_reg import * +from copy_reg import __all__, __doc__ diff --git a/Lib/http/__init__.py b/Lib/http/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Lib/http/client.py b/Lib/http/client.py new file mode 100644 index 00000000000..9fa679bbbd7 --- /dev/null +++ b/Lib/http/client.py @@ -0,0 +1,13 @@ +from httplib import (__doc__, HTTPResponse, HTTPConnection, HTTPException, + NotConnected, UnknownProtocol, UnknownTransferEncoding, + UnimplementedFileMode, IncompleteRead, InvalidURL, + ImproperConnectionState, CannotSendRequest, + CannotSendHeader, ResponseNotReady, BadStatusLine, + LineTooLong, error, responses) + +__all__ = ["HTTPResponse", "HTTPConnection", + "HTTPException", "NotConnected", "UnknownProtocol", + "UnknownTransferEncoding", "UnimplementedFileMode", + "IncompleteRead", "InvalidURL", "ImproperConnectionState", + "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", + "BadStatusLine", "LineTooLong", "error", "responses"] diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py new file mode 100644 index 00000000000..13001acf6bc --- /dev/null +++ b/Lib/http/cookiejar.py @@ -0,0 +1,5 @@ +from cookielib import * +from cookielib import __doc__ + +__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', + 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py new file mode 100644 index 00000000000..b7084753631 --- /dev/null +++ b/Lib/http/cookies.py @@ -0,0 +1,93 @@ +r""" +Here's a sample session to show how to use this module. +At the moment, this is the only documentation. + +The Basics +---------- + +Importing is easy... + + >>> from http import cookies + +Most of the time you start by creating a cookie. + + >>> C = cookies.SimpleCookie() + +Once you've created your Cookie, you can add values just as if it were +a dictionary. + + >>> C = cookies.SimpleCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' + +Notice that the printable representation of a Cookie is the +appropriate format for a Set-Cookie: header. This is the +default behavior. You can change the header and printed +attributes by using the .output() function + + >>> C = cookies.SimpleCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print(C.output(header="Cookie:")) + Cookie: rocky=road; Path=/cookie + >>> print(C.output(attrs=[], header="Cookie:")) + Cookie: rocky=road + +The load() method of a Cookie extracts cookies from a string. In a +CGI script, you would use this method to extract the cookies from the +HTTP_COOKIE environment variable. + + >>> C = cookies.SimpleCookie() + >>> C.load("chips=ahoy; vienna=finger") + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' + +The load() method is darn-tootin smart about identifying cookies +within a string. Escaped quotation marks, nested semicolons, and other +such trickeries do not confuse it. + + >>> C = cookies.SimpleCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print(C) + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + +Each element of the Cookie also supports all of the RFC 2109 +Cookie attributes. Here's an example which sets the Path +attribute. + + >>> C = cookies.SimpleCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print(C) + Set-Cookie: oreo=doublestuff; Path=/ + +Each dictionary element has a 'value' attribute, which gives you +back the value associated with the key. + + >>> C = cookies.SimpleCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + +The SimpleCookie expects that all values should be standard strings. +Just to be sure, SimpleCookie invokes the str() builtin to convert +the value to a string, when the values are set dictionary-style. + + >>> C = cookies.SimpleCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' + +Finis. +""" + +from Cookie import CookieError, BaseCookie, Morsel, SimpleCookie + +__all__ = ['CookieError', 'BaseCookie', 'SimpleCookie'] diff --git a/Lib/http/server.py b/Lib/http/server.py new file mode 100644 index 00000000000..0bd87d62148 --- /dev/null +++ b/Lib/http/server.py @@ -0,0 +1,9 @@ +__all__ = ['HTTPServer', 'BaseHTTPRequestHandler', 'SimpleHTTPRequestHandler', + 'CGIHTTPRequestHandler'] + +from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler +from CGIHTTPServer import CGIHTTPRequestHandler, executable, nobody_uid +from SimpleHTTPServer import SimpleHTTPRequestHandler, test + +if __name__ == '__main__': + test() diff --git a/Lib/importlib/__init__.py b/Lib/importlib/__init__.py index ad31a1ac477..ee4dd48d9ea 100644 --- a/Lib/importlib/__init__.py +++ b/Lib/importlib/__init__.py @@ -36,3 +36,5 @@ def import_module(name, package=None): name = _resolve_name(name[level:], package, level) __import__(name) return sys.modules[name] + +reload = reload diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 2cdcbb21efc..63d8b85fbd0 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -585,6 +585,10 @@ def test_izip_tuple_reuse(self): ids = map(id, list(izip('abc', 'def'))) self.assertEqual(len(dict.fromkeys(ids)), len(ids)) + def test_aliases(self): + self.assertEqual(izip_longest, zip_longest) + self.assertEqual(ifilterfalse, filterfalse) + def test_iziplongest(self): for args in [ ['abc', range(6)], diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index 3bfee8d2434..2de6d20f3a4 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -93,7 +93,10 @@ def ismethod(oclass, obj, name): continue self.assertHasattr(module, name, ignore) py_item = getattr(module, name) - if isinstance(value, pyclbr.Function): + if name == '__path__': + self.assertIsInstance(py_item, list) + self.assertEqual(py_item, value) + elif isinstance(value, pyclbr.Function): self.assertIsInstance(py_item, (FunctionType, BuiltinFunctionType)) if py_item.__module__ != moduleName: continue # skip functions that came from somewhere else diff --git a/Lib/test/test_urllib_parse.py b/Lib/test/test_urllib_parse.py new file mode 100644 index 00000000000..02eb1ea605e --- /dev/null +++ b/Lib/test/test_urllib_parse.py @@ -0,0 +1,436 @@ +"""Tests for backported functions in urllib.parse""" + +from __future__ import unicode_literals +import urllib.parse +import http.client +import unittest + +def hexescape(char): + """Escape char as RFC 2396 specifies""" + hex_repr = hex(ord(char))[2:].upper() + if len(hex_repr) == 1: + hex_repr = "0%s" % hex_repr + return "%" + hex_repr + +class QuotingTests(unittest.TestCase): + r"""Tests for urllib.quote() and urllib.quote_plus() + + According to RFC 3986 (Uniform Resource Identifiers), to escape a + character you write it as '%' + <2 character US-ASCII hex value>. + The Python code of ``'%' + hex(ord())[2:]`` escapes a + character properly. Case does not matter on the hex letters. + + The various character sets specified are: + + Reserved characters : ";/?:@&=+$," + Have special meaning in URIs and must be escaped if not being used for + their special meaning + Data characters : letters, digits, and "-_.!~*'()" + Unreserved and do not need to be escaped; can be, though, if desired + Control characters : 0x00 - 0x1F, 0x7F + Have no use in URIs so must be escaped + space : 0x20 + Must be escaped + Delimiters : '<>#%"' + Must be escaped + Unwise : "{}|\^[]`" + Must be escaped + + """ + + def test_never_quote(self): + # Make sure quote() does not quote letters, digits, and "_,.-" + do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyz", + "0123456789", + "_.-"]) + result = urllib.parse.quote(do_not_quote) + self.assertEqual(do_not_quote, result, + "using quote(): %r != %r" % (do_not_quote, result)) + result = urllib.parse.quote_plus(do_not_quote) + self.assertEqual(do_not_quote, result, + "using quote_plus(): %r != %r" % (do_not_quote, result)) + + def test_default_safe(self): + # Test '/' is default value for 'safe' parameter + self.assertEqual(urllib.parse.quote.__defaults__[0], '/') + + def test_safe(self): + # Test setting 'safe' parameter does what it should do + quote_by_default = "<>" + result = urllib.parse.quote(quote_by_default, safe=quote_by_default) + self.assertEqual(quote_by_default, result, + "using quote(): %r != %r" % (quote_by_default, result)) + result = urllib.parse.quote_plus(quote_by_default, + safe=quote_by_default) + self.assertEqual(quote_by_default, result, + "using quote_plus(): %r != %r" % + (quote_by_default, result)) + # Safe expressed as bytes rather than str + result = urllib.parse.quote(quote_by_default, safe=b"<>") + self.assertEqual(quote_by_default, result, + "using quote(): %r != %r" % (quote_by_default, result)) + + # This feature is not implemented in Python 2. + # TODO: Add a workaround that allows these tests to pass. + + # "Safe" non-ASCII characters should have no effect + # (Since URIs are not allowed to have non-ASCII characters) + # result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") + # expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") + # self.assertEqual(expect, result, + # "using quote(): %r != %r" % + # (expect, result)) + # Same as above, but using a bytes rather than str + # result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") + # expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") + # self.assertEqual(expect, result, + # "using quote(): %r != %r" % + # (expect, result)) + + def test_default_quoting(self): + # Make sure all characters that should be quoted are by default sans + # space (separate test for that). + should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F + should_quote.append(r'<>#%"{}|\^[]`') + should_quote.append(chr(127)) # For 0x7F + should_quote = ''.join(should_quote) + for char in should_quote: + result = urllib.parse.quote(char) + self.assertEqual(hexescape(char), result, + "using quote(): " + "%s should be escaped to %s, not %s" % + (char, hexescape(char), result)) + result = urllib.parse.quote_plus(char) + self.assertEqual(hexescape(char), result, + "using quote_plus(): " + "%s should be escapes to %s, not %s" % + (char, hexescape(char), result)) + del should_quote + partial_quote = "ab[]cd" + expected = "ab%5B%5Dcd" + result = urllib.parse.quote(partial_quote) + self.assertEqual(expected, result, + "using quote(): %r != %r" % (expected, result)) + result = urllib.parse.quote_plus(partial_quote) + self.assertEqual(expected, result, + "using quote_plus(): %r != %r" % (expected, result)) + + def test_quoting_space(self): + # Make sure quote() and quote_plus() handle spaces as specified in + # their unique way + result = urllib.parse.quote(' ') + self.assertEqual(result, hexescape(' '), + "using quote(): %r != %r" % (result, hexescape(' '))) + result = urllib.parse.quote_plus(' ') + self.assertEqual(result, '+', + "using quote_plus(): %r != +" % result) + given = "a b cd e f" + expect = given.replace(' ', hexescape(' ')) + result = urllib.parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + expect = given.replace(' ', '+') + result = urllib.parse.quote_plus(given) + self.assertEqual(expect, result, + "using quote_plus(): %r != %r" % (expect, result)) + + def test_quoting_plus(self): + self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'), + 'alpha%2Bbeta+gamma') + self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'), + 'alpha+beta+gamma') + # Test with bytes + self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'), + 'alpha%2Bbeta+gamma') + # Test with safe bytes + self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'), + 'alpha+beta+gamma') + + def test_quote_bytes(self): + # Bytes should quote directly to percent-encoded values + given = b"\xa2\xd8ab\xff" + expect = "%A2%D8ab%FF" + result = urllib.parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Encoding argument should raise type error on bytes input + self.assertRaises(TypeError, urllib.parse.quote, given, + encoding="latin-1") + # quote_from_bytes should work the same + result = urllib.parse.quote_from_bytes(given) + self.assertEqual(expect, result, + "using quote_from_bytes(): %r != %r" + % (expect, result)) + + def test_quote_with_unicode(self): + # Characters in Latin-1 range, encoded by default in UTF-8 + given = "\xa2\xd8ab\xff" + expect = "%C2%A2%C3%98ab%C3%BF" + result = urllib.parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in Latin-1 range, encoded by with None (default) + result = urllib.parse.quote(given, encoding=None, errors=None) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in Latin-1 range, encoded with Latin-1 + given = "\xa2\xd8ab\xff" + expect = "%A2%D8ab%FF" + result = urllib.parse.quote(given, encoding="latin-1") + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in BMP, encoded by default in UTF-8 + given = "\u6f22\u5b57" # "Kanji" + expect = "%E6%BC%A2%E5%AD%97" + result = urllib.parse.quote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in BMP, encoded with Latin-1 + given = "\u6f22\u5b57" + self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given, + encoding="latin-1") + # Characters in BMP, encoded with Latin-1, with replace error handling + given = "\u6f22\u5b57" + expect = "%3F%3F" # "??" + result = urllib.parse.quote(given, encoding="latin-1", + errors="replace") + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + # Characters in BMP, Latin-1, with xmlcharref error handling + given = "\u6f22\u5b57" + expect = "%26%2328450%3B%26%2323383%3B" # "漢字" + result = urllib.parse.quote(given, encoding="latin-1", + errors="xmlcharrefreplace") + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + + def test_quote_plus_with_unicode(self): + # Encoding (latin-1) test for quote_plus + given = "\xa2\xd8 \xff" + expect = "%A2%D8+%FF" + result = urllib.parse.quote_plus(given, encoding="latin-1") + self.assertEqual(expect, result, + "using quote_plus(): %r != %r" % (expect, result)) + # Errors test for quote_plus + given = "ab\u6f22\u5b57 cd" + expect = "ab%3F%3F+cd" + result = urllib.parse.quote_plus(given, encoding="latin-1", + errors="replace") + self.assertEqual(expect, result, + "using quote_plus(): %r != %r" % (expect, result)) + + +class UnquotingTests(unittest.TestCase): + """Tests for unquote() and unquote_plus() + + See the doc string for quoting_Tests for details on quoting and such. + + """ + + def test_unquoting(self): + # Make sure unquoting of all ASCII values works + escape_list = [] + for num in range(128): + given = hexescape(chr(num)) + expect = chr(num) + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + result = urllib.parse.unquote_plus(given) + self.assertEqual(expect, result, + "using unquote_plus(): %r != %r" % + (expect, result)) + escape_list.append(given) + escape_string = ''.join(escape_list) + del escape_list + result = urllib.parse.unquote(escape_string) + self.assertEqual(result.count('%'), 1, + "using unquote(): not all characters escaped: " + "%s" % result) + self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None) + self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ()) + + def test_unquoting_badpercent(self): + # Test unquoting on bad percent-escapes + given = '%xab' + expect = given + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + given = '%x' + expect = given + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + given = '%' + expect = given + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + # unquote_to_bytes + given = '%xab' + expect = given.encode('ascii') + result = urllib.parse.unquote_to_bytes(given) + self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" + % (expect, result)) + given = '%x' + expect = given.encode('ascii') + result = urllib.parse.unquote_to_bytes(given) + self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" + % (expect, result)) + given = '%' + expect = given.encode('ascii') + result = urllib.parse.unquote_to_bytes(given) + self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" + % (expect, result)) + self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None) + self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ()) + + def test_unquoting_mixed_case(self): + # Test unquoting on mixed-case hex digits in the percent-escapes + given = '%Ab%eA' + expect = b'\xab\xea' + result = urllib.parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + + def test_unquoting_parts(self): + # Make sure unquoting works when have non-quoted characters + # interspersed + given = 'ab%sd' % hexescape('c') + expect = "abcd" + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using quote(): %r != %r" % (expect, result)) + result = urllib.parse.unquote_plus(given) + self.assertEqual(expect, result, + "using unquote_plus(): %r != %r" % (expect, result)) + + def test_unquoting_plus(self): + # Test difference between unquote() and unquote_plus() + given = "are+there+spaces..." + expect = given + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + expect = given.replace('+', ' ') + result = urllib.parse.unquote_plus(given) + self.assertEqual(expect, result, + "using unquote_plus(): %r != %r" % (expect, result)) + + def test_unquote_to_bytes(self): + given = 'br%C3%BCckner_sapporo_20050930.doc' + expect = b'br\xc3\xbcckner_sapporo_20050930.doc' + result = urllib.parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + # Test on a string with unescaped non-ASCII characters + # (Technically an invalid URI; expect those characters to be UTF-8 + # encoded). + result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC") + expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + # Test with a bytes as input + given = b'%A2%D8ab%FF' + expect = b'\xa2\xd8ab\xff' + result = urllib.parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + # Test with a bytes as input, with unescaped non-ASCII bytes + # (Technically an invalid URI; expect those bytes to be preserved) + given = b'%A2\xd8ab%FF' + expect = b'\xa2\xd8ab\xff' + result = urllib.parse.unquote_to_bytes(given) + self.assertEqual(expect, result, + "using unquote_to_bytes(): %r != %r" + % (expect, result)) + + def test_unquote_with_unicode(self): + # Characters in the Latin-1 range, encoded with UTF-8 + given = 'br%C3%BCckner_sapporo_20050930.doc' + expect = 'br\u00fcckner_sapporo_20050930.doc' + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + # Characters in the Latin-1 range, encoded with None (default) + result = urllib.parse.unquote(given, encoding=None, errors=None) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Characters in the Latin-1 range, encoded with Latin-1 + result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc', + encoding="latin-1") + expect = 'br\u00fcckner_sapporo_20050930.doc' + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Characters in BMP, encoded with UTF-8 + given = "%E6%BC%A2%E5%AD%97" + expect = "\u6f22\u5b57" # "Kanji" + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Decode with UTF-8, invalid sequence + given = "%F3%B1" + expect = "\ufffd" # Replacement character + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Decode with UTF-8, invalid sequence, replace errors + result = urllib.parse.unquote(given, errors="replace") + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # Decode with UTF-8, invalid sequence, ignoring errors + given = "%F3%B1" + expect = "" + result = urllib.parse.unquote(given, errors="ignore") + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII and percent-encoded characters, UTF-8 + result = urllib.parse.unquote("\u6f22%C3%BC") + expect = '\u6f22\u00fc' + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII and percent-encoded characters, Latin-1 + # (Note, the string contains non-Latin-1-representable characters) + result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1") + expect = '\u6f22\u00fc' + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + def test_unquoting_with_bytes_input(self): + # ASCII characters decoded to a string + given = b'blueberryjam' + expect = 'blueberryjam' + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII hex-encoded characters and ASCII characters + given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y' + expect = 'bl\u00e5b\u00e6rsyltet\u00f8y' + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII percent-encoded characters and ASCII characters + given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j' + expect = 'bl\u00e5b\u00e6rsyltet\u00f8j' + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/urllib.py b/Lib/urllib/__init__.py similarity index 100% rename from Lib/urllib.py rename to Lib/urllib/__init__.py diff --git a/Lib/urllib/error.py b/Lib/urllib/error.py new file mode 100644 index 00000000000..9cf9980e9b9 --- /dev/null +++ b/Lib/urllib/error.py @@ -0,0 +1,3 @@ +from __future__ import absolute_import +from urllib import ContentTooShortError +from urllib2 import URLError, HTTPError diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py new file mode 100644 index 00000000000..d28bbcc5b94 --- /dev/null +++ b/Lib/urllib/parse.py @@ -0,0 +1,112 @@ +from __future__ import absolute_import +__all__ = ['ParseResult', 'SplitResult', 'parse_qs', 'parse_qsl', 'urldefrag', + 'urljoin', 'urlparse', 'urlsplit', 'urlunparse', 'urlunsplit', + 'quote', 'quote_plus', 'unquote', 'unquote_plus', + 'unquote_to_bytes', 'urlencode', 'splitquery', 'splittag', + 'splituser', 'splitvalue', 'uses_fragment', 'uses_netloc', + 'uses_params', 'uses_query', 'uses_relative', 'unwrap'] + +from urllib import (quote as _quote_from_bytes, unquote as _unquote_to_bytes, + urlencode, splitattr, splithost, splitpasswd, splitport, + splitquery, splittag, splittype, splituser, splitvalue, + unwrap) +from urlparse import (__doc__, ParseResult, SplitResult, parse_qs, parse_qsl, + urldefrag, urljoin, urlparse, urlsplit, urlunparse, + urlunsplit, uses_fragment, uses_netloc, uses_params, + uses_query, uses_relative) +import re + +# Functions modified from Python 3's urllib. +def to_bytes(url): + """to_bytes(u"URL") --> 'URL'.""" + # Most URL schemes require ASCII. If that changes, the conversion + # can be relaxed. + if isinstance(url, str): + try: + url = url.encode("ASCII").decode() + except UnicodeError: + raise UnicodeError("URL " + repr(url) + + " contains non-ASCII characters") + return url + +def quote_from_bytes(string, safe='/'): + return _quote_from_bytes(string, safe).decode('ascii') + +def quote(string, safe='/', encoding=None, errors=None): + if isinstance(string, unicode): + if not string: + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'strict' + string = string.encode(encoding, errors) + else: + if encoding is not None: + raise TypeError("quote() doesn't support 'encoding' for bytes") + if errors is not None: + raise TypeError("quote() doesn't support 'errors' for bytes") + if isinstance(safe, unicode): + safe = safe.encode(encoding or 'utf-8', errors or 'strict') + return quote_from_bytes(string, safe) +quote.__doc__ = quote_from_bytes.__doc__ + +def quote_plus(string, safe='', encoding=None, errors=None): + """Like quote(), but also replace ' ' with '+', as required for quoting + HTML form values. Plus signs in the original string are escaped unless + they are included in safe. It also does not have safe default to '/'. + """ + # Check if ' ' in string, where string may either be a str or bytes. If + # there are no spaces, the regular quote will produce the right answer. + if ((isinstance(string, unicode) and u' ' not in string) or + (isinstance(string, str) and ' ' not in string)): + return quote(string, safe, encoding, errors) + if isinstance(safe, str): + space = ' ' + else: + space = u' ' + string = quote(string, safe + space, encoding, errors) + return string.replace(' ', '+') + +def unquote_to_bytes(string): + if not isinstance(string, str): + string = string.encode('utf-8') + return _unquote_to_bytes(string) + +_asciire = re.compile('([\x00-\x7f]+)', re.UNICODE) + +def unquote(string, encoding='utf-8', errors='replace'): + """Replace %xx escapes by their single-character equivalent. The optional + encoding and errors parameters specify how to decode percent-encoded + sequences into Unicode characters, as accepted by the bytes.decode() + method. + By default, percent-encoded sequences are decoded with UTF-8, and invalid + sequences are replaced by a placeholder character. + + unquote('abc%20def') -> 'abc def'. + """ + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'replace' + if isinstance(string, str): + return _unquote_to_bytes(string).decode(encoding, errors) + if '%' not in string: + string.split + return string + bits = _asciire.split(string) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote_to_bytes(bits[i]).decode(encoding, errors)) + append(bits[i + 1]) + return u''.join(res) + +def unquote_plus(string, encoding='utf-8', errors='replace'): + """Like unquote(), but also replace plus signs by spaces, as required for + unquoting HTML form values. + + unquote_plus('%7e/abc+def') -> '~/abc def' + """ + string = string.replace('+', ' ') + return unquote(string, encoding, errors) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py new file mode 100644 index 00000000000..5f9bfa1afc4 --- /dev/null +++ b/Lib/urllib/request.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import +__all__ = ['urlopen', 'install_opener', 'build_opener', 'pathname2url', + 'url2pathname', 'getproxies', 'Request', 'OpenerDirector', + 'HTTPDefaultErrorHandler', 'HTTPRedirectHandler', + 'HTTPCookieProcessor', 'ProxyHandler', 'BaseHandler', + 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', + 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', + 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', + 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler', + 'HTTPSHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'UnknownHandler', 'HTTPErrorProcessor', 'urlretrieve', 'urlcleanup', + 'URLopener', 'FancyURLopener', 'proxy_bypass', 'parse_http_list', + 'parse_keqv_list'] + +from urllib import (pathname2url, url2pathname, getproxies, urlretrieve, + urlcleanup, URLopener, FancyURLopener, proxy_bypass) +from urllib2 import (__doc__, urlopen, install_opener, build_opener, Request, + OpenerDirector, HTTPDefaultErrorHandler, + HTTPRedirectHandler, HTTPCookieProcessor, ProxyHandler, + BaseHandler, HTTPPasswordMgr, + HTTPPasswordMgrWithDefaultRealm, AbstractBasicAuthHandler, + HTTPBasicAuthHandler, ProxyBasicAuthHandler, + AbstractDigestAuthHandler, HTTPDigestAuthHandler, + ProxyDigestAuthHandler, HTTPHandler, HTTPSHandler, + FileHandler, FTPHandler, CacheFTPHandler, UnknownHandler, + HTTPErrorProcessor, parse_http_list, parse_keqv_list, + AbstractHTTPHandler) + +# Not strictly part of urllib.request, however here for compatibility with bad +# code that thinks they are. +from urllib.error import URLError, HTTPError, ContentTooShortError +from urllib.parse import ( + urlparse, urlsplit, urljoin, unwrap, quote, unquote, + splittype, splithost, splitport, splituser, splitpasswd, + splitattr, splitquery, splitvalue, splittag, to_bytes, + unquote_to_bytes, urlunparse) + +__doc__ = __doc__.replace('urllib2', __name__) diff --git a/Lib/urllib/response.py b/Lib/urllib/response.py new file mode 100644 index 00000000000..3112fa742e7 --- /dev/null +++ b/Lib/urllib/response.py @@ -0,0 +1,11 @@ +"""Response classes used by urllib. + +The base class, addbase, defines a minimal file-like interface, +including read() and readline(). The typical response object is an +addinfourl instance, which defines an info() method that returns +headers and a geturl() method that returns the url. +""" + +from __future__ import absolute_import +__all__ = ['addbase', 'addclosehook', 'addinfo', 'addinfourl'] +from urllib import addbase, addclosehook, addinfo, addinfourl diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py new file mode 100644 index 00000000000..a70e5073e38 --- /dev/null +++ b/Lib/urllib/robotparser.py @@ -0,0 +1,3 @@ +from __future__ import absolute_import +from robotparser import * +from robotparser import __all__, __doc__ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index ac5bf31af98..a676635d04a 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -4131,6 +4131,11 @@ inititertools(void) PyModule_AddObject(m, name+1, (PyObject *)typelist[i]); } + Py_INCREF(&ifilterfalse_type); + PyModule_AddObject(m, "filterfalse", (PyObject *)&ifilterfalse_type); + Py_INCREF(&iziplongest_type); + PyModule_AddObject(m, "zip_longest", (PyObject *)&iziplongest_type); + if (PyType_Ready(&teedataobject_type) < 0) return; if (PyType_Ready(&tee_type) < 0)