From c0a8f7f47a590bf08ec44eac48cb6734a7156684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20Kl=C3=A4rck?= Date: Fri, 16 Sep 2016 15:07:12 +0300 Subject: [PATCH 1/6] Fix pip when using Py2 on Windows with non-ASCII hostname or username. Two related problems are fixed: - Previously non-ASCII characters in hostname blew up `pip install` completely. This is rather severe. - Non-ASCII characters in username crashed printing help text. Not so bad but definitely annoying. Non-ASCII usernames are pretty common in non-English speaking countries. That also makes non-ASCII hostnames pretty common, because Windows creates hostname based on the username by default. The reason for these failures was that `user_cache_dir` returned Unicode on Windows and bytes elsewhere, and rest of the codebase was expecting paths to be bytes. It could be argued that pip should always use Unicode internally, but that would require a lot more changes and fixing also some of the vendored dependencies. We can also argue, like PEP-519 does, that paths should not be considered to be strings at all, and thus the "all Unicode internally" guideline wouldn't apply in this case. Fixes #3463. See that issue for more discussion and details. --- pip/utils/appdirs.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pip/utils/appdirs.py b/pip/utils/appdirs.py index 15ca3cd03e8..93bd6d9871a 100644 --- a/pip/utils/appdirs.py +++ b/pip/utils/appdirs.py @@ -8,6 +8,7 @@ import sys from pip.compat import WINDOWS, expanduser +from pip._vendor.six import PY2 def user_cache_dir(appname): @@ -35,6 +36,11 @@ def user_cache_dir(appname): # Get the base path path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) + # When using Python 2, return paths as bytes on Windows similarly as on + # other operating systems. See helper function docs for more details. + if PY2 and isinstance(path, unicode): + path = _win_path_to_bytes(path) + # Add our app name and Cache directory to it path = os.path.join(path, appname, "Cache") elif sys.platform == "darwin": @@ -222,3 +228,22 @@ def _get_win_folder_with_ctypes(csidl_name): _get_win_folder = _get_win_folder_with_ctypes except ImportError: _get_win_folder = _get_win_folder_from_registry + + +def _win_path_to_bytes(path): + """Encode Windows paths to bytes when using Python 2. + + Motivation is to be consistent with other operating systems where paths + are also returned as bytes. This avoids problems mixing bytes and Unicode + elsewhere in the codebase. + + If encoding using ASCII and MBCS fails, return the original Unicode path. + + See https://github.com/pypa/pip/issues/3463 for more details and discussion. + """ + for encoding in ('ASCII', 'MBCS'): + try: + return path.encode(encoding) + except (UnicodeEncodeError, LookupError): + pass + return path From 9d26c1c8907d1a5b1e63ba6ed7d03eae2f79fe23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20Kl=C3=A4rck?= Date: Thu, 29 Sep 2016 23:44:02 +0300 Subject: [PATCH 2/6] Finetune docs. Includes keeping lines < 80 chars. --- pip/utils/appdirs.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pip/utils/appdirs.py b/pip/utils/appdirs.py index 93bd6d9871a..76fd4cdf9b1 100644 --- a/pip/utils/appdirs.py +++ b/pip/utils/appdirs.py @@ -36,7 +36,7 @@ def user_cache_dir(appname): # Get the base path path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) - # When using Python 2, return paths as bytes on Windows similarly as on + # When using Python 2, return paths as bytes on Windows like we do on # other operating systems. See helper function docs for more details. if PY2 and isinstance(path, unicode): path = _win_path_to_bytes(path) @@ -231,15 +231,14 @@ def _get_win_folder_with_ctypes(csidl_name): def _win_path_to_bytes(path): - """Encode Windows paths to bytes when using Python 2. + """Encode Windows paths to bytes. Only used on Python 2. Motivation is to be consistent with other operating systems where paths are also returned as bytes. This avoids problems mixing bytes and Unicode - elsewhere in the codebase. + elsewhere in the codebase. For more details and discussion see + . If encoding using ASCII and MBCS fails, return the original Unicode path. - - See https://github.com/pypa/pip/issues/3463 for more details and discussion. """ for encoding in ('ASCII', 'MBCS'): try: From 27192fd02abbaa993a56f24855bc8dd61d522f98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20Kl=C3=A4rck?= Date: Thu, 29 Sep 2016 23:54:22 +0300 Subject: [PATCH 3/6] Add non-ASCII Windows fix to CHANGES.txt --- CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.txt b/CHANGES.txt index f96bbc08e88..4f6c70ed164 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -36,6 +36,9 @@ * Skip scanning virtual environments even when venv/bin/python is a dangling symlink. +* Fix problems on Windows on Python 2 when username or hostname contains + non-ASCII characters (:issue:`3463`, :pull:`3970`). + **8.1.2 (2016-05-10)** From 14176e5eba78b3ea501acdc49fe42ed264c5f4c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20Kl=C3=A4rck?= Date: Fri, 30 Sep 2016 15:17:56 +0300 Subject: [PATCH 4/6] Make flake8 happy by using 'six.text_type' and not 'unicode' --- pip/utils/appdirs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pip/utils/appdirs.py b/pip/utils/appdirs.py index 76fd4cdf9b1..a4948cd9239 100644 --- a/pip/utils/appdirs.py +++ b/pip/utils/appdirs.py @@ -8,7 +8,7 @@ import sys from pip.compat import WINDOWS, expanduser -from pip._vendor.six import PY2 +from pip._vendor.six import PY2, text_type def user_cache_dir(appname): @@ -38,7 +38,7 @@ def user_cache_dir(appname): # When using Python 2, return paths as bytes on Windows like we do on # other operating systems. See helper function docs for more details. - if PY2 and isinstance(path, unicode): + if PY2 and isinstance(path, text_type): path = _win_path_to_bytes(path) # Add our app name and Cache directory to it From d726afe8b61aa2f2286eae8c35905697c59f1a60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20Kl=C3=A4rck?= Date: Fri, 30 Sep 2016 15:19:05 +0300 Subject: [PATCH 5/6] Unit test to verify user_cache_dirs returns str everywhere. Couldn't unfortunately run this locally due to unit test execution failing totally. This test is pretty simple so hopefully it works anyway. Would like to add a separate test to make sure return type is str also on Windows when _get_win_folder returns Unicode. Adding a test with mocking and skipping would be easier if I could run it locally too. --- tests/unit/test_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 32220b4d180..3732ff9edb7 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -22,6 +22,7 @@ from pip.utils.encoding import auto_decode from pip.utils.hashes import Hashes, MissingHashes from pip.utils.glibc import check_glibc_version +from pip.utils.appdirs import user_cache_dir from pip._vendor.six import BytesIO @@ -524,3 +525,9 @@ def test_manylinux1_check_glibc_version(self): else: # Didn't find the warning we were expecting assert False + +class Test_user_cache_dir(object): + + def test_return_path_as_str(self): + """Test that path is bytes on Python 2 and Unicode on Python 3.""" + assert isinstance(user_cache_dir('test'), str) From efeaf3057651de31bec6ad4614337f58acb80ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20Kl=C3=A4rck?= Date: Fri, 30 Sep 2016 16:55:28 +0300 Subject: [PATCH 6/6] pep8 --- tests/unit/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 3732ff9edb7..d81ac81a215 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -526,6 +526,7 @@ def test_manylinux1_check_glibc_version(self): # Didn't find the warning we were expecting assert False + class Test_user_cache_dir(object): def test_return_path_as_str(self):