From e058486c9195bcfc294c731e03b5b5784729f9a6 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 3 Sep 2015 17:38:34 -0400 Subject: [PATCH 01/39] Fix some docstring typos. --- pip/download.py | 2 +- pip/req/req_set.py | 4 ++-- pip/utils/appdirs.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pip/download.py b/pip/download.py index a083316464f..751572feb5b 100644 --- a/pip/download.py +++ b/pip/download.py @@ -561,7 +561,7 @@ def resp_read(chunk_size): # Special case for urllib3. for chunk in resp.raw.stream( chunk_size, - # We use decode_content=False here because we do + # We use decode_content=False here because we don't # want urllib3 to mess with the raw bytes we get # from the server. If we decompress inside of # urllib3 then we cannot verify the checksum diff --git a/pip/req/req_set.py b/pip/req/req_set.py index 58951adba96..c153337a475 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -396,9 +396,9 @@ def _check_skip_installed(self, req_to_install, finder): return None def _prepare_file(self, finder, req_to_install): - """Prepare a single requirements files. + """Prepare a single requirements file. - :return: A list of addition InstallRequirements to also install. + :return: A list of additional InstallRequirements to also install. """ # Tell user what we are doing for this requirement: # obtain (editable), skipping, processing (local url), collecting diff --git a/pip/utils/appdirs.py b/pip/utils/appdirs.py index 5dd66eb1ed9..b163a77e26b 100644 --- a/pip/utils/appdirs.py +++ b/pip/utils/appdirs.py @@ -1,6 +1,6 @@ """ This code was taken from https://github.com/ActiveState/appdirs and modified -to suite our purposes. +to suit our purposes. """ from __future__ import absolute_import From 62ac258e1e45af4b4b4103b8cc5d32d6d97284f4 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 11 Sep 2015 11:14:33 -0400 Subject: [PATCH 02/39] Delete dead _copy_dist_from_dir(). --- pip/download.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/pip/download.py b/pip/download.py index 751572feb5b..1561b6b2cd9 100644 --- a/pip/download.py +++ b/pip/download.py @@ -727,45 +727,6 @@ def unpack_file_url(link, location, download_dir=None): _copy_file(from_path, download_dir, content_type, link) -def _copy_dist_from_dir(link_path, location): - """Copy distribution files in `link_path` to `location`. - - Invoked when user requests to install a local directory. E.g.: - - pip install . - pip install ~/dev/git-repos/python-prompt-toolkit - - """ - - # Note: This is currently VERY SLOW if you have a lot of data in the - # directory, because it copies everything with `shutil.copytree`. - # What it should really do is build an sdist and install that. - # See https://github.com/pypa/pip/issues/2195 - - if os.path.isdir(location): - rmtree(location) - - # build an sdist - setup_py = 'setup.py' - sdist_args = [sys.executable] - sdist_args.append('-c') - sdist_args.append( - "import setuptools, tokenize;__file__=%r;" - "exec(compile(getattr(tokenize, 'open', open)(__file__).read()" - ".replace('\\r\\n', '\\n'), __file__, 'exec'))" % setup_py) - sdist_args.append('sdist') - sdist_args += ['--dist-dir', location] - logger.info('Running setup.py sdist for %s', link_path) - - with indent_log(): - call_subprocess(sdist_args, cwd=link_path, show_stdout=False) - - # unpack sdist into `location` - sdist = os.path.join(location, os.listdir(location)[0]) - logger.info('Unpacking sdist %s into %s', sdist, location) - unpack_file(sdist, location, content_type=None, link=None) - - class PipXmlrpcTransport(xmlrpc_client.Transport): """Provide a `xmlrpclib.Transport` implementation via a `PipSession` object. From 9211d6e3135d1d0d2b63eb9c2b6900faafe349bb Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 11 Sep 2015 14:41:58 -0400 Subject: [PATCH 03/39] Style tweaks --- pip/download.py | 12 ++++-------- pip/req/req_set.py | 5 ++++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pip/download.py b/pip/download.py index 1561b6b2cd9..6fd969c7d03 100644 --- a/pip/download.py +++ b/pip/download.py @@ -602,12 +602,8 @@ def resp_read(chunk_size): if show_progress: # We don't show progress on cached responses if total_length: - logger.info( - "Downloading %s (%s)", url, format_size(total_length), - ) - progress_indicator = DownloadProgressBar( - max=total_length, - ).iter + logger.info("Downloading %s (%s)", url, format_size(total_length)) + progress_indicator = DownloadProgressBar(max=total_length).iter else: logger.info("Downloading %s", url) progress_indicator = DownloadProgressSpinner().iter @@ -862,8 +858,8 @@ def _check_download_dir(link, download_dir): _check_hash(download_hash, link) except HashMismatch: logger.warning( - 'Previously-downloaded file %s has bad hash, ' - 're-downloading.', + 'Previously-downloaded file %s has bad hash. ' + 'Re-downloading.', download_path ) os.unlink(download_path) diff --git a/pip/req/req_set.py b/pip/req/req_set.py index c153337a475..6557775c6b3 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -9,7 +9,7 @@ from pip._vendor import pkg_resources from pip._vendor import requests -from pip.download import (url_to_path, unpack_url) +from pip.download import url_to_path, unpack_url from pip.exceptions import (InstallationError, BestVersionAlreadyInstalled, DistributionNotFound, PreviousBuildDirError) from pip.req.req_install import InstallRequirement @@ -408,6 +408,9 @@ def _prepare_file(self, finder, req_to_install): req_to_install.prepared = True + # ###################### # + # # print log messages # # + # ###################### # if req_to_install.editable: logger.info('Obtaining %s', req_to_install) else: From 3303be0c4e8a7a15847e1ea59733029d303924a0 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 3 Sep 2015 17:37:29 -0400 Subject: [PATCH 04/39] Teach requirements parser how to parser hash options, like --sha256. We purposely keep it off the CLI for now. optparse isn't really geared to expose interspersed args and options, so a more heavy-handed approach will be necessary to support things like `pip install SomePackage --sha256=abcdef... OtherPackage --sha256=012345...`. --- pip/cmdoptions.py | 39 +++++++++++++++++++++++++++++++++++++ pip/req/req_file.py | 2 +- tests/unit/test_req_file.py | 18 +++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 889862ac4b5..02ff771859e 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -10,6 +10,7 @@ from __future__ import absolute_import from functools import partial +import hashlib from optparse import OptionGroup, SUPPRESS_HELP, Option import warnings @@ -522,6 +523,44 @@ def only_binary(): help=SUPPRESS_HELP, ) +def _good_hashes(): + """Return names of hashlib algorithms at least as strong as sha256. + + Preserve the order from hashlib.algorithms so --help comes out in + deterministic order. + + """ + # Remove getattr when 2.6 dies. + algos = getattr(hashlib, + 'algorithms', + ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512')) + return [a for a in algos if a not in set(['md5', 'sha1', 'sha224'])] + +def _merge_mapping(option, opt_str, value, parser): + """Append the value to a list pointed to by the option name in a dict.""" + if not parser.values.hashes: + parser.values.hashes = {} + parser.values.hashes.setdefault(opt_str[2:], []).append(value) + +def hash_options(): + """Return an iterable of options named after hashlib's algorithms. + + Leave out ones weaker than sha256. + + """ + for algo_name in _good_hashes(): + yield partial(Option, + '--' + algo_name, + # Hash values eventually end up in + # InstallRequirement.hashes due to __dict__ copying in + # process_line(). + dest='hashes', + action='callback', + callback=_merge_mapping, + type='string', + help="Verify that the package's archive matches this " + 'hash before installing.') + ########## # groups # diff --git a/pip/req/req_file.py b/pip/req/req_file.py index 4b3f683c6db..daa7b285619 100644 --- a/pip/req/req_file.py +++ b/pip/req/req_file.py @@ -52,7 +52,7 @@ SUPPORTED_OPTIONS_REQ = [ cmdoptions.install_options, cmdoptions.global_options -] +] + list(cmdoptions.hash_options()) # the 'dest' string values SUPPORTED_OPTIONS_REQ_DEST = [o().dest for o in SUPPORTED_OPTIONS_REQ] diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 1e3dbbbe5ce..1631d1dd610 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -161,6 +161,24 @@ def test_options_on_a_requirement_line(self): 'global_options': ['yo3', 'yo4'], 'install_options': ['yo1', 'yo2']} + def test_hash_options(self): + """Test the runtime-generated Options that correspond to hashlib + algorithms. + + Make sure they read and preserve multiple hashes. + + """ + line = ('SomeProject ' + '--sha256=2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 ' + '--sha384=59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' + '--sha256=486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7') + filename = 'filename' + req = list(process_line(line, filename, 1))[0] + assert req.options == {'hashes': { + 'sha256': ['2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824', + '486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7'], + 'sha384': ['59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f']}} + def test_set_isolated(self, options): line = 'SomeProject' filename = 'filename' From 1e41f018234e34bd1aa02c16526362c1dd302661 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 9 Sep 2015 13:01:53 -0400 Subject: [PATCH 05/39] Add checks against requirements-file-dwelling hashes for most kinds of packages. Close #1175. * Add --require-hashes option. This is handy in deployment scripts to force application authors to hash their requirements. It is also a convenient way to get pip to show computed hashes for a virgin, unhashed requirements file. Eventually, additions to `pip freeze` should fill a superset of this use case. * In --require-hashes mode, at least one hash is required to match for each requirement. * Option-based requirements (--sha256=...) turn on --require-hashes mode implicitly. * Internet-derived URL-based hashes are "necessary but not sufficient": they do not satisfy --require-hashes mode when they match, but they are still used to guard against transmission errors. * Other URL-based requirements (#md5=...) are treated just like flag-based ones, except they don't turn on --require-hashes. * Complain informatively, with the most devastating errors first so you don't chase your tail all day only to run up against a brick wall at the end. This also means we don't complain that a hash is missing, only for the user to find, after fixing it, that we have no idea how to even compute a hash for that type of requirement. * Complain about unpinned requirements when hash-checking mode is on, lest they cause the user surprise later. * Complain about missing hashes. * Complain about requirement types we don't know how to hash (like VCS ones and local dirs). * Have InstallRequirement keep its original Link around (original_link) so we can differentiate between URL hashes from requirements files and ones downloaded from the (untrustworthy) internet. * Remove test_download_hashes, which is obsolete. Similar coverage is provided in test_utils.TestHashes and the various hash cases in test_req.py. --- pip/commands/install.py | 10 ++ pip/download.py | 128 +++++++------- pip/exceptions.py | 207 ++++++++++++++++++++++- pip/req/req_install.py | 42 ++++- pip/req/req_set.py | 123 +++++++++++--- pip/utils/__init__.py | 6 + pip/utils/hashes.py | 88 ++++++++++ tests/functional/test_install.py | 42 ++++- tests/lib/__init__.py | 18 +- tests/unit/test_download.py | 9 +- tests/unit/test_download_hashes.py | 263 ----------------------------- tests/unit/test_req.py | 160 +++++++++++++++++- tests/unit/test_req_file.py | 12 +- tests/unit/test_utils.py | 47 ++++++ 14 files changed, 777 insertions(+), 378 deletions(-) create mode 100644 pip/utils/hashes.py delete mode 100644 tests/unit/test_download_hashes.py diff --git a/pip/commands/install.py b/pip/commands/install.py index 46cd9f22ef2..fab83d0e290 100644 --- a/pip/commands/install.py +++ b/pip/commands/install.py @@ -159,6 +159,15 @@ def __init__(self, *args, **kw): cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option( + '--require-hashes', + dest='require_hashes', + action='store_true', + help='Perform a provably repeatable installation by requiring a ' + 'hash to check each package against. Implied by the presence ' + 'of a hash flag, like --sha256, on any individual ' + 'requirement') + index_opts = cmdoptions.make_option_group( cmdoptions.index_group, self.parser, @@ -266,6 +275,7 @@ def run(self, options, args): pycompile=options.compile, isolated=options.isolated_mode, wheel_cache=wheel_cache, + require_hashes=options.require_hashes, ) self.populate_requirement_set( diff --git a/pip/download.py b/pip/download.py index 6fd969c7d03..abd214655c0 100644 --- a/pip/download.py +++ b/pip/download.py @@ -29,7 +29,7 @@ from pip.models import PyPI from pip.utils import (splitext, rmtree, format_size, display_path, backup_dir, ask_path_exists, unpack_file, - call_subprocess, ARCHIVE_EXTENSIONS) + call_subprocess, ARCHIVE_EXTENSIONS, consume) from pip.utils.filesystem import check_path_owner from pip.utils.logging import indent_log from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner @@ -485,57 +485,22 @@ def is_file_url(link): return link.url.lower().startswith('file:') -def _check_hash(download_hash, link): - if download_hash.digest_size != hashlib.new(link.hash_name).digest_size: - logger.critical( - "Hash digest size of the package %d (%s) doesn't match the " - "expected hash name %s!", - download_hash.digest_size, link, link.hash_name, - ) - raise HashMismatch('Hash name mismatch for package %s' % link) - if download_hash.hexdigest() != link.hash: - logger.critical( - "Hash of the package %s (%s) doesn't match the expected hash %s!", - link, download_hash.hexdigest(), link.hash, - ) - raise HashMismatch( - 'Bad %s hash for package %s' % (link.hash_name, link) - ) +def is_dir_url(link): + """Return whether a file:// Link points to a directory. + ``link`` must not have any other scheme but file://. Call is_file_url() + first. -def _get_hash_from_file(target_file, link): - try: - download_hash = hashlib.new(link.hash_name) - except (ValueError, TypeError): - logger.warning( - "Unsupported hash name %s for package %s", link.hash_name, link, - ) - return None - - with open(target_file, 'rb') as fp: - while True: - chunk = fp.read(4096) - if not chunk: - break - download_hash.update(chunk) - return download_hash + """ + link_path = url_to_path(link.url_without_fragment) + return os.path.isdir(link_path) def _progress_indicator(iterable, *args, **kwargs): return iterable -def _download_url(resp, link, content_file): - download_hash = None - if link.hash and link.hash_name: - try: - download_hash = hashlib.new(link.hash_name) - except ValueError: - logger.warning( - "Unsupported hash name %s for package %s", - link.hash_name, link, - ) - +def _download_url(resp, link, content_file, hashes): try: total_length = int(resp.headers['content-length']) except (ValueError, KeyError, TypeError): @@ -593,6 +558,11 @@ def resp_read(chunk_size): break yield chunk + def written_chunks(chunks): + for chunk in chunks: + content_file.write(chunk) + yield chunk + progress_indicator = _progress_indicator if link.netloc == PyPI.netloc: @@ -614,13 +584,12 @@ def resp_read(chunk_size): logger.debug('Downloading from URL %s', link) - for chunk in progress_indicator(resp_read(4096), 4096): - if download_hash is not None: - download_hash.update(chunk) - content_file.write(chunk) - if link.hash and link.hash_name: - _check_hash(download_hash, link) - return download_hash + downloaded_chunks = written_chunks(progress_indicator(resp_read(4096), + 4096)) + if hashes: + hashes.check_against_chunks(downloaded_chunks) + else: + consume(downloaded_chunks) def _copy_file(filename, location, content_type, link): @@ -648,7 +617,11 @@ def _copy_file(filename, location, content_type, link): logger.info('Saved %s', display_path(download_location)) -def unpack_http_url(link, location, download_dir=None, session=None): +def unpack_http_url(link, + location, + download_dir=None, + session=None, + hashes=None): if session is None: raise TypeError( "unpack_http_url() missing 1 required keyword argument: 'session'" @@ -659,14 +632,19 @@ def unpack_http_url(link, location, download_dir=None, session=None): # If a download dir is specified, is the file already downloaded there? already_downloaded_path = None if download_dir: - already_downloaded_path = _check_download_dir(link, download_dir) + already_downloaded_path = _check_download_dir(link, + download_dir, + hashes) if already_downloaded_path: from_path = already_downloaded_path content_type = mimetypes.guess_type(from_path)[0] else: # let's download to a tmp dir - from_path, content_type = _download_http_url(link, session, temp_dir) + from_path, content_type = _download_http_url(link, + session, + temp_dir, + hashes) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies @@ -681,15 +659,16 @@ def unpack_http_url(link, location, download_dir=None, session=None): rmtree(temp_dir) -def unpack_file_url(link, location, download_dir=None): +def unpack_file_url(link, location, download_dir=None, hashes=None): """Unpack link into location. - If download_dir is provided and link points to a file, make a copy - of the link file inside download_dir.""" + If download_dir is provided and link points to a file, make a copy + of the link file inside download_dir. + """ link_path = url_to_path(link.url_without_fragment) # If it's a url to a local directory - if os.path.isdir(link_path): + if is_dir_url(link): if os.path.isdir(location): rmtree(location) shutil.copytree(link_path, location, symlinks=True) @@ -697,15 +676,17 @@ def unpack_file_url(link, location, download_dir=None): logger.info('Link is a directory, ignoring download_dir') return - # if link has a hash, let's confirm it matches - if link.hash: - link_path_hash = _get_hash_from_file(link_path, link) - _check_hash(link_path_hash, link) + # If --require-hashes is off, `hashes` is either empty, the link hash, or + # MissingHashes, and it's required to match. If --require-hashes is on, we + # are satisfied by any hash in `hashes` matching: a URL-based or an + # option-based one; no internet-sourced hash will be in `hashes`. + if hashes: + hashes.check_against_path(link_path) # If a download dir is specified, is the file already there and valid? already_downloaded_path = None if download_dir: - already_downloaded_path = _check_download_dir(link, download_dir) + already_downloaded_path = _check_download_dir(link, download_dir, hashes) if already_downloaded_path: from_path = already_downloaded_path @@ -752,7 +733,7 @@ def request(self, host, handler, request_body, verbose=False): def unpack_url(link, location, download_dir=None, - only_download=False, session=None): + only_download=False, session=None, hashes=None): """Unpack link. If link is a VCS link: if only_download, export into download_dir and ignore location @@ -761,6 +742,11 @@ def unpack_url(link, location, download_dir=None, - unpack into location - if download_dir, copy the file into download_dir - if only_download, mark location for deletion + + :param hashes: A Hashes object, one of whose embedded hashes must match, + or I'll raise HashMismatch. If the Hashes is empty, no matches are + required, and unhashable types of requirements (like VCS ones, which + would ordinarily raise HashUnsupported) are allowed. """ # non-editable vcs urls if is_vcs_url(link): @@ -768,7 +754,7 @@ def unpack_url(link, location, download_dir=None, # file urls elif is_file_url(link): - unpack_file_url(link, location, download_dir) + unpack_file_url(link, location, download_dir, hashes=hashes) # http urls else: @@ -780,12 +766,13 @@ def unpack_url(link, location, download_dir=None, location, download_dir, session, + hashes=hashes ) if only_download: write_delete_marker_file(location) -def _download_http_url(link, session, temp_dir): +def _download_http_url(link, session, temp_dir, hashes): """Download link url into temp_dir using provided session""" target_url = link.url.split('#', 1)[0] try: @@ -840,11 +827,11 @@ def _download_http_url(link, session, temp_dir): filename += ext file_path = os.path.join(temp_dir, filename) with open(file_path, 'wb') as content_file: - _download_url(resp, link, content_file) + _download_url(resp, link, content_file, hashes) return file_path, content_type -def _check_download_dir(link, download_dir): +def _check_download_dir(link, download_dir, hashes): """ Check download_dir for previously downloaded file with correct hash If a correct file is found return its path else None """ @@ -852,10 +839,9 @@ def _check_download_dir(link, download_dir): if os.path.exists(download_path): # If already downloaded, does its hash match? logger.info('File was already downloaded %s', download_path) - if link.hash: - download_hash = _get_hash_from_file(download_path, link) + if hashes: try: - _check_hash(download_hash, link) + hashes.check_against_path(download_path) except HashMismatch: logger.warning( 'Previously-downloaded file %s has bad hash. ' diff --git a/pip/exceptions.py b/pip/exceptions.py index a4cb15e1318..4fa16c0a85f 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -1,6 +1,10 @@ """Exceptions used throughout package""" from __future__ import absolute_import +from itertools import chain, groupby, repeat + +from pip._vendor.six import iteritems + class PipError(Exception): """Base pip exception""" @@ -39,13 +43,208 @@ class PreviousBuildDirError(PipError): """Raised when there's a previous conflicting build directory""" -class HashMismatch(InstallationError): - """Distribution file hash values don't match.""" - - class InvalidWheelFilename(InstallationError): """Invalid wheel filename.""" class UnsupportedWheel(InstallationError): """Unsupported wheel.""" + + +# The recommended hash algo of the moment. Feel free to change this any time. +FAVORITE_HASH = 'sha256' + + +class HashErrors(InstallationError): + """Multiple HashError instances rolled into one for reporting""" + + def __init__(self): + self.errors = [] + + def append(self, error): + self.errors.append(error) + + def __str__(self): + lines = [] + self.errors.sort(key=lambda e: e.order) + for cls, errors_of_cls in groupby(self.errors, lambda e: e.__class__): + lines.append(cls.head()) + lines.extend(e.body() for e in errors_of_cls) + if lines: + return '\n'.join(lines) + + def __nonzero__(self): + return bool(self.errors) + + def __bool__(self): + return self.__nonzero__() + + +class HashError(InstallationError): + """A failure to verify a package against known-good hashes + + :cvar order: An int sorting hash exception classes by difficulty of + recovery (lower being harder), so the user doesn't bother fretting + about unpinned packages when he has deeper issues, like VCS + dependencies, to deal with. Also keeps error reports in a + deterministic order. + :ivar req: The InstallRequirement that triggered this error. This is + pasted on after the exception is instantiated, because it's not + typically available earlier. + + """ + req = None + + @classmethod + def head(cls): + """Return a section heading for display above potentially many + exceptions of this kind.""" + + def body(self): + """Return a summary of me for display under the heading. + + This default implementation simply prints a description of the + triggering requirement. + + :param req: The InstallRequirement that provoked this error, with + populate_link() having already been called + + """ + return ' %s' % self._requirement_name() + + def __str__(self): + return '%s\n%s' % (self.head(), self.body()) + + def _requirement_name(self): # TODO: Make sure this is the best it can be and is DRY with subclasses. + """Return a description of the requirement that triggered me. + + This default implementation returns long description of the req, with + line numbers + + """ + return str(self.req) if self.req else 'unknown package' + + +class VcsHashUnsupported(HashError): + """A hash was provided for a version-control-system-based requirement, but + we don't have a method for hashing those.""" + + order = 0 + + @classmethod + def head(cls): + return ("Can't verify hashes for these requirements because we don't " + "have a way to hash version control repositories:") + + +class DirectoryUrlHashUnsupported(HashError): + """A hash was provided for a version-control-system-based requirement, but + we don't have a method for hashing those.""" + + order = 1 + + @classmethod + def head(cls): + return ("Can't verify hashes for these file:// requirements because " + "they point to directories:") + + +class HashMissing(HashError): + """A hash was needed for a requirement but is absent.""" + + order = 2 + + def __init__(self, gotten_hash): + """ + :param gotten_hash: The hash of the (possibly malicious) archive we + just downloaded + """ + self.gotten_hash = gotten_hash + + @classmethod + def head(cls): + return ('These requirements were missing hashes, which leaves them ' + 'open to tampering. (Hashes are required in --require-hashes ' + 'mode, which is implicitly on when a hash is specified for ' + 'any package.) Here are the hashes the downloaded archives ' + 'actually had. You can add lines like these to your ' + 'requirements files to pin them down.') + + def body(self): + return ' %s --%s=%s' % (self.req.req if self.req and + # In case someone feeds something + # downright stupid to + # InstallRequirement's constructor: + getattr(self.req, 'req', None) + else 'unknown package', + FAVORITE_HASH, + self.gotten_hash) + + +class HashUnpinned(HashError): + """A requirement had a hash specified but was not pinned to a specific + version.""" + + order = 3 + + @classmethod + def head(cls): + return ('When a hash is specified, a requirement must also have its ' + 'version pinned with ==. These do not:') + + +class HashMismatch(HashError): + """Distribution file hash values don't match. + + :ivar package_name: The name of the package that triggered the hash + mismatch. Feel free to write to this after the exception is raise to + improve its error message. + + """ + order = 4 + + def __init__(self, goods, gots): + """ + :param goods: A dict of algorithm names pointing to lists of allowed + hex digests + :param gots: A dict of algorithm names pointing to hashes we + actually got from the files under suspicion + """ + self.goods = goods + self.gots = gots + + @classmethod + def head(cls): + return ('THESE PACKAGES DID NOT MATCH THE HASHES FROM THE ' + 'REQUIREMENTS FILE. If you have updated the package versions, ' + 'update the hashes. Otherwise, examine the package contents ' + 'carefully; someone may have tampered with them.') + + def body(self): + return ' %s:\n%s' % (self._requirement_name(), + self._hash_comparison()) + + def _hash_comparison(self): + """Return a comparison of actual and expected hash values. + + Example:: + + Expected sha256 abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde + or 123451234512345123451234512345123451234512345 + Got bcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdef + + """ + def hash_then_or(hash_name): + # For now, all the decent hashes have 6-char names, so we can get + # away with hard-coding space literals. + return chain([hash_name], repeat(' or')) + + lines = [] + for hash_name, expecteds in iteritems(self.goods): + prefix = hash_then_or(hash_name) + lines.extend((' Expected %s %s' % (next(prefix), e)) + for e in expecteds) + lines.append(' Got %s\n' % + self.gots[hash_name].hexdigest()) + prefix = ' or' + return '\n'.join(lines) diff --git a/pip/req/req_install.py b/pip/req/req_install.py index 4689ae14c6e..365d4c51e78 100644 --- a/pip/req/req_install.py +++ b/pip/req/req_install.py @@ -32,6 +32,7 @@ call_subprocess, read_text_file, FakeFile, _make_build_dir, ensure_dir, get_installed_version ) +from pip.utils.hashes import Hashes from pip.utils.logging import indent_log from pip.req.req_uninstall import UninstallPathSet from pip.vcs import vcs @@ -76,7 +77,7 @@ def __init__(self, req, comes_from, source_dir=None, editable=False, self.editable_options = editable_options self._wheel_cache = wheel_cache - self.link = link + self.link = self.original_link = link self.as_egg = as_egg self.markers = markers self._egg_info_path = None @@ -265,6 +266,15 @@ def link(self, link): def specifier(self): return self.req.specifier + @property + def is_pinned(self): + """Return whether I am pinned to an exact version. + + For example, some-package==1.2 is pinned; some-package>1.2 is not. + """ + specifiers = self.specifier + return len(specifiers) == 1 and next(iter(specifiers)).operator == '==' + def from_path(self): if self.req is None: return None @@ -1005,6 +1015,36 @@ def get_dist(self): project_name=dist_name, metadata=metadata) + @property + def has_hash_options(self): + """Return whether any known-good hashes are specified as options. + + These activate --require-hashes mode; hashes specified as part of a + URL do not. + + """ + return bool(self.options.get('hashes', {})) + + def hashes(self, trust_internet=True): + """Return a hash-comparer that considers my option- and URL-based + hashes to be known-good. + + Hashes in URLs are almost peers with ones from flags. They satisfy + --require-hashes (whether it was implicitly or explicitly activated) + but do not activate it. md5 and sha224 are not allowed in flags, which + should nudge people toward good algos. We always OR all hashes + together, even ones from URLs. + + :param trust_internet: Whether to trust URL-based (#md5=...) hashes + downloaded from the internet, as by populate_link() + + """ + good_hashes = self.options.get('hashes', {}).copy() + link = self.link if trust_internet else self.original_link + if link and link.hash: + good_hashes.setdefault(link.hash_name, []).append(link.hash) + return Hashes(good_hashes) + def _strip_postfix(req): """ diff --git a/pip/req/req_set.py b/pip/req/req_set.py index 6557775c6b3..878bc8f74f7 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -2,19 +2,23 @@ from collections import defaultdict import functools -import itertools +from itertools import chain import logging import os from pip._vendor import pkg_resources from pip._vendor import requests -from pip.download import url_to_path, unpack_url +from pip.download import (is_file_url, is_dir_url, is_vcs_url, url_to_path, + unpack_url) from pip.exceptions import (InstallationError, BestVersionAlreadyInstalled, - DistributionNotFound, PreviousBuildDirError) + DistributionNotFound, PreviousBuildDirError, + HashError, HashErrors, HashUnpinned, + DirectoryUrlHashUnsupported, VcsHashUnsupported) from pip.req.req_install import InstallRequirement from pip.utils import ( display_path, dist_in_usersite, ensure_dir, normalize_path) +from pip.utils.hashes import MissingHashes from pip.utils.logging import indent_log from pip.vcs import vcs @@ -140,7 +144,7 @@ def __init__(self, build_dir, src_dir, download_dir, upgrade=False, ignore_dependencies=False, force_reinstall=False, use_user_site=False, session=None, pycompile=True, isolated=False, wheel_download_dir=None, - wheel_cache=None): + wheel_cache=None, require_hashes=False): """Create a RequirementSet. :param wheel_download_dir: Where still-packed .whl files should be @@ -186,6 +190,7 @@ def __init__(self, build_dir, src_dir, download_dir, upgrade=False, wheel_download_dir = normalize_path(wheel_download_dir) self.wheel_download_dir = wheel_download_dir self._wheel_cache = wheel_cache + self._require_hashes = require_hashes # Maps from install_req -> dependencies_of_install_req self._dependencies = defaultdict(list) @@ -315,23 +320,6 @@ def uninstall(self, auto_confirm=False): req.uninstall(auto_confirm=auto_confirm) req.commit_uninstall() - def _walk_req_to_install(self, handler): - """Call handler for all pending reqs. - - :param handler: Handle a single requirement. Should take a requirement - to install. Can optionally return an iterable of additional - InstallRequirements to cover. - """ - # The list() here is to avoid potential mutate-while-iterating bugs. - discovered_reqs = [] - reqs = itertools.chain( - list(self.unnamed_requirements), list(self.requirements.values()), - discovered_reqs) - for req_to_install in reqs: - more_reqs = handler(req_to_install) - if more_reqs: - discovered_reqs.extend(more_reqs) - def prepare_files(self, finder): """ Prepare process. Create temp directories, download and/or unpack files. @@ -340,8 +328,37 @@ def prepare_files(self, finder): if self.wheel_download_dir: ensure_dir(self.wheel_download_dir) - self._walk_req_to_install( - functools.partial(self._prepare_file, finder)) + # If any top-level requirement has a hash specified, enter + # hash-checking mode, which requires hashes from all. + root_reqs = self.unnamed_requirements + self.requirements.values() + require_hashes = (self._require_hashes or + any(req.has_hash_options for req in root_reqs)) + if require_hashes and self.as_egg: + raise InstallationError( + '--egg is not allowed with --require-hashes mode, since it ' + 'delegates dependency resolution to setuptools and could thus ' + 'result in installation of unhashed packages.') + + # Actually prepare the files, and collect any exceptions. The + # *HashUnsupported exceptions cannot be checked ahead of time, because + # req.populate_links() needs to be called before we can examine the + # link type. + discovered_reqs = [] + hash_errors = HashErrors() + for req in chain(root_reqs, discovered_reqs): + try: + discovered_reqs.extend(self._prepare_file( + finder, + req, + require_hashes=require_hashes, + ignore_dependencies=self.ignore_dependencies)) + except HashError as exc: + exc.req = req + hash_errors.append(exc) + + if hash_errors: + raise hash_errors + def _check_skip_installed(self, req_to_install, finder): """Check if req_to_install should be skipped. @@ -395,7 +412,11 @@ def _check_skip_installed(self, req_to_install, finder): else: return None - def _prepare_file(self, finder, req_to_install): + def _prepare_file(self, + finder, + req_to_install, + require_hashes=False, + ignore_dependencies=False): """Prepare a single requirements file. :return: A list of additional InstallRequirements to also install. @@ -442,6 +463,11 @@ def _prepare_file(self, finder, req_to_install): # # vcs update or unpack archive # # # ################################ # if req_to_install.editable: + if require_hashes: + raise InstallationError( + 'The editable requirement %s cannot be installed when ' + 'requiring hashes, because there is no single file to ' + 'hash.' % req_to_install) req_to_install.ensure_has_source_dir(self.src_dir) req_to_install.update_editable(not self.is_download) abstract_dist = make_abstract_dist(req_to_install) @@ -449,6 +475,12 @@ def _prepare_file(self, finder, req_to_install): if self.is_download: req_to_install.archive(self.download_dir) elif req_to_install.satisfied_by: + if require_hashes: + logger.info( + 'Since it is already installed, we are trusting this ' + 'package without checking its hash. To ensure a ' + 'completely repeatable environment, install into an ' + 'empty virtualenv.') abstract_dist = Installed(req_to_install) else: # @@ if filesystem packages are not marked @@ -480,6 +512,41 @@ def _prepare_file(self, finder, req_to_install): # If no new versions are found, DistributionNotFound is raised, # otherwise a result is guaranteed. assert req_to_install.link + link = req_to_install.link + + # Now that we have the real link, we can tell what kind of + # requirements we have and raise some more informative errors + # than otherwise. (For example, we can raise VcsHashUnsupported + # for a VCS URL rather than HashMissing.) + if require_hashes: + # We could check these first 2 conditions inside + # unpack_url and save repetition of conditions, but then + # we would report less-useful error messages for + # unhashable requirements, complaining that there's no + # hash provided. + if is_vcs_url(link): + raise VcsHashUnsupported() + elif is_file_url(link) and is_dir_url(link): + raise DirectoryUrlHashUnsupported() + if (not req_to_install.original_link and + not req_to_install.is_pinned): + # Unpinned packages are asking for trouble when a new + # version is uploaded. This isn't a security check, but + # it saves users a surprising hash mismatch in the + # future. + # + # file:/// URLs aren't pinnable, so don't complain + # about them not being pinned. + raise HashUnpinned() + hashes = req_to_install.hashes( + trust_internet=not require_hashes) + if require_hashes and not hashes: + # Known-good hashes are missing for this requirement, so + # shim it with a facade object that will provoke hash + # computation and then raise a HashMissing exception + # showing the user what the hash should be. + hashes = MissingHashes() + try: download_dir = self.download_dir # We always delete unpacked sdists after pip ran. @@ -501,7 +568,7 @@ def _prepare_file(self, finder, req_to_install): unpack_url( req_to_install.link, req_to_install.source_dir, download_dir, autodelete_unpacked, - session=self.session) + session=self.session, hashes=hashes) except requests.HTTPError as exc: logger.critical( 'Could not install requirement %s because ' @@ -564,7 +631,11 @@ def add_req(subreq): # 'unnamed' requirements will get added here self.add_requirement(req_to_install, None) - if not self.ignore_dependencies: + if not ignore_dependencies and not require_hashes: + # --require-hashes implies --no-deps because, otherwise, + # unhashed dependencies could creep in. In the future, we + # should report unhashed dependencies rather than just not + # installing them. if (req_to_install.extras): logger.debug( "Installing extra requirements: %r", diff --git a/pip/utils/__init__.py b/pip/utils/__init__.py index f39393efa46..99de89df014 100644 --- a/pip/utils/__init__.py +++ b/pip/utils/__init__.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +from collections import deque import contextlib import errno import locale @@ -832,3 +833,8 @@ def get_installed_version(dist_name): def canonicalize_name(name): """Convert an arbitrary string to a canonical name used for comparison""" return pkg_resources.safe_name(name).lower() + + +def consume(iterator): + """Consume an iterable at C speed.""" + deque(iterator, maxlen=0) diff --git a/pip/utils/hashes.py b/pip/utils/hashes.py new file mode 100644 index 00000000000..ab06c097a54 --- /dev/null +++ b/pip/utils/hashes.py @@ -0,0 +1,88 @@ +from __future__ import absolute_import + +import hashlib + +from pip.exceptions import (HashMismatch, HashMissing, InstallationError, + FAVORITE_HASH) +from pip._vendor.six import iteritems, iterkeys, itervalues + + +class Hashes(object): + """A wrapper that builds multiple hashes at once and checks them against + known-good values + + """ + def __init__(self, hashes=None): + """ + :param hashes: A dict of algorithm names pointing to lists of allowed + hex digests + """ + self._goods = {} if hashes is None else hashes + + def check_against_chunks(self, chunks): + """Check good hashes against ones built from iterable of chunks of + data. + + Raise HashMismatch if none match. + + """ + gots = {} + for hash_name in iterkeys(self._goods): + try: + gots[hash_name] = hashlib.new(hash_name) + except (ValueError, TypeError): + raise InstallationError('Unknown hash name: %s' % hash_name) + + for chunk in chunks: + for hash in itervalues(gots): + hash.update(chunk) + + for hash_name, got in iteritems(gots): + if got.hexdigest() in self._goods[hash_name]: + return + self._raise(gots) + + def _raise(self, gots): + raise HashMismatch(self._goods, gots) + + def check_against_file(self, file): + """Check good hashes against a file-like object + + Raise HashMismatch if none match. + + """ + def chunks(): + while True: + chunk = file.read(4096) + if not chunk: + break + yield chunk + return self.check_against_chunks(chunks()) + + def check_against_path(self, path): + with open(path, 'rb') as file: + return self.check_against_file(file) + + def __nonzero__(self): + """Return whether I know any known-good hashes.""" + return bool(self._goods) + + def __bool__(self): + return self.__nonzero__() + + +class MissingHashes(Hashes): + """A workalike for Hashes used when we're missing a hash for a requirement + + It computes the "gotten" hash of the requirement and raises a HashMissing + exception showing it to the user. + + """ + def __init__(self): + """Don't offer the ``hashes`` kwarg.""" + # Pass our favorite hash in to generate a "gotten hash". With the + # empty list, it will never match, so an error will always raise. + super(MissingHashes, self).__init__(hashes={FAVORITE_HASH: []}) + + def _raise(self, gots): + raise HashMissing(gots[FAVORITE_HASH].hexdigest()) diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index f724bb9bbf5..8f54fcd66c2 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1,4 +1,3 @@ - import os import textwrap import glob @@ -9,7 +8,8 @@ from pip.utils import appdirs, rmtree from tests.lib import (pyversion, pyversion_tuple, - _create_test_package, _create_svn_repo, path_to_url) + _create_test_package, _create_svn_repo, path_to_url, + requirements_file) from tests.lib.local_repos import local_checkout from tests.lib.path import Path @@ -217,6 +217,44 @@ def test_install_from_local_directory(script, data): assert egg_info_folder in result.files_created, str(result) +def test_hashed_install_success(script, data, tmpdir): + """ + Test that installing various sorts of requirements with correct hashes + works. + + Test file URLs and index packages (which become HTTP URLs behind the + scenes). + + """ + file_url = path_to_url( + (data.packages / 'simple-1.0.tar.gz').abspath) + with requirements_file('simple2==1.0 --sha256=9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c7\n' + '{simple} --sha256=393043e672415891885c9a2a0929b1af95fb866d6ca016b42d2e6ce53619b653'.format(simple=file_url), + tmpdir) as reqs_file: + result = script.pip_install_local('-r', + reqs_file.abspath, + expect_error=False) + + +def test_hashed_install_failure(script, data, tmpdir): + """Test that wrong hashes stop installation. + + This makes sure prepare_files() is called in the course of installation + and so has the opportunity to halt if hashes are wrong. Checks on various + kinds of hashes are in test_req.py. + + """ + file_url = path_to_url( + (data.packages / 'simple-1.0.tar.gz').abspath) + with requirements_file('simple2==1.0 --sha256=9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c\n', + tmpdir) as reqs_file: + result = script.pip_install_local('-r', + reqs_file.abspath, + expect_error=True) + assert len(result.files_created) == 0 + + + def test_install_from_local_directory_with_symlinks_to_directories( script, data): """ diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index c96746a769a..31d684b4205 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +from contextlib import contextmanager import os import sys import re @@ -569,7 +570,22 @@ def assert_raises_regexp(exception, reg, run, *args, **kwargs): try: run(*args, **kwargs) assert False, "%s should have been thrown" % exception - except Exception: + except exception: e = sys.exc_info()[1] p = re.compile(reg) assert p.search(str(e)), str(e) + + +@contextmanager +def requirements_file(contents, tmpdir): + """Return a Path to a requirements file of given contents. + + As long as the context manager is open, the requirements file will exist. + + :param tmpdir: A Path to the folder in which to create the file + + """ + path = tmpdir / 'reqs.txt' + path.write(contents) + yield path + path.remove() diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index 00ba356b331..dd1a6d11ad8 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -16,6 +16,7 @@ unpack_file_url, ) from pip.index import Link +from pip.utils.hashes import Hashes def test_unpack_http_url_with_urllib_response_without_content_type(data): @@ -105,6 +106,7 @@ def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file): 'location', download_dir=download_dir, session=session, + hashes=Hashes({'sha1': [download_hash.hexdigest()]}) ) # despite existence of downloaded file with bad hash, downloaded again @@ -209,7 +211,9 @@ def test_unpack_file_url_bad_hash(self, tmpdir, data, self.prep(tmpdir, data) self.dist_url.url = "%s#md5=bogus" % self.dist_url.url with pytest.raises(HashMismatch): - unpack_file_url(self.dist_url, self.build_dir) + unpack_file_url(self.dist_url, + self.build_dir, + hashes=Hashes({'md5': ['bogus']})) def test_unpack_file_url_download_bad_hash(self, tmpdir, data, monkeypatch): @@ -235,7 +239,8 @@ def test_unpack_file_url_download_bad_hash(self, tmpdir, data, dist_path_md5 ) unpack_file_url(self.dist_url, self.build_dir, - download_dir=self.download_dir) + download_dir=self.download_dir, + hashes=Hashes({'md5': [dist_path_md5]})) # confirm hash is for simple1-1.0 # the previous bad download has been removed diff --git a/tests/unit/test_download_hashes.py b/tests/unit/test_download_hashes.py deleted file mode 100644 index 061e768ebbd..00000000000 --- a/tests/unit/test_download_hashes.py +++ /dev/null @@ -1,263 +0,0 @@ -import pytest - -from pip.download import _get_hash_from_file, _check_hash -from pip.exceptions import InstallationError -from pip.index import Link - - -def test_get_hash_from_file_md5(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#md5=d41d8cd98f00b204e9800998ecf8427e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 16 - assert download_hash.hexdigest() == "d41d8cd98f00b204e9800998ecf8427e" - - -def test_get_hash_from_file_sha1(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha1=da39a3ee5e6b4b0d3255bfef95601890afd80709" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 20 - assert download_hash.hexdigest() == ( - "da39a3ee5e6b4b0d3255bfef95601890afd80709" - ) - - -def test_get_hash_from_file_sha224(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha224=d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 28 - assert download_hash.hexdigest() == ( - "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f" - ) - - -def test_get_hash_from_file_sha384(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha384=38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e" - "1da274edebfe76f65fbd51ad2f14898b95b" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 48 - assert download_hash.hexdigest() == ( - "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274e" - "debfe76f65fbd51ad2f14898b95b" - ) - - -def test_get_hash_from_file_sha256(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852" - "b855" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 32 - assert download_hash.hexdigest() == ( - "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" - ) - - -def test_get_hash_from_file_sha512(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha512=cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36" - "ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 64 - assert download_hash.hexdigest() == ( - "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0" - "d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" - ) - - -def test_get_hash_from_file_unknown(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#unknown_hash=d41d8cd98f00b204e9800998ecf8427e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash is None - - -def test_check_hash_md5_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#md5=d41d8cd98f00b204e9800998ecf8427e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_md5_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#md5=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha1_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha1=da39a3ee5e6b4b0d3255bfef95601890afd80709" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha1_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha1=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha224_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha224=d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f'" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha224_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha224=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha384_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha384=38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6" - "e1da274edebfe76f65fbd51ad2f14898b95b" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha384_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha384=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha256_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b785" - "2b855" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha256_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha256=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha512_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha512=cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36c" - "e9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha512_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha512=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hasher_mismsatch(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#md5=d41d8cd98f00b204e9800998ecf8427e" - ) - other_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b785" - "2b855" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, other_link) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index fe91ecba3a1..254e004956a 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -8,9 +8,12 @@ from mock import Mock, patch, mock_open from pip.exceptions import (PreviousBuildDirError, InvalidWheelFilename, UnsupportedWheel) -from pip.download import PipSession +from pip.download import path_to_url, PipSession +from pip.exceptions import (HashMissing, HashUnpinned, VcsHashUnsupported, + HashErrors, InstallationError) from pip.index import PackageFinder from pip.req import (InstallRequirement, RequirementSet, Requirements) +from pip.req.req_file import process_line from pip.req.req_install import parse_editable from pip.utils import read_text_file from pip._vendor import pkg_resources @@ -26,12 +29,13 @@ def setup(self): def teardown(self): shutil.rmtree(self.tempdir, ignore_errors=True) - def basic_reqset(self): + def basic_reqset(self, **kwargs): return RequirementSet( build_dir=os.path.join(self.tempdir, 'build'), src_dir=os.path.join(self.tempdir, 'src'), download_dir=None, session=PipSession(), + **kwargs ) def test_no_reuse_existing_build_dir(self, data): @@ -69,6 +73,158 @@ def test_environment_marker_extras(self, data): else: assert not reqset.has_requirement('simple') + @pytest.mark.network + def test_missing_hash_checking(self, data): + """Make sure prepare_files() raises an error when a requirement has no + hash in implicit hash-checking mode. + """ + reqset = self.basic_reqset() + # No flags here. This tests that detection of later flags nonetheless + # requires earlier packages to have hashes: + reqset.add_requirement( + list(process_line('blessings==1.0', 'file', 1))[0]) + # This flag activates --require-hashes mode: + reqset.add_requirement( + list(process_line('tracefront==0.1 --sha256=somehash', 'file', 2))[0]) + # This hash should be accepted because it came from the reqs file, not + # from the internet: + reqset.add_requirement( + list(process_line('https://pypi.python.org/packages/source/m/more-' + 'itertools/more-itertools-1.0.tar.gz#md5=b21850c' + '3cfa7efbb70fd662ab5413bdd', 'file', 3))[0]) + finder = PackageFinder([], + ['https://pypi.python.org/simple'], + session=PipSession()) + assert_raises_regexp( + HashErrors, + r'These requirements were missing hashes.*\n' + r' blessings==1.0 --sha256=[0-9a-f]+\n' + r'THESE PACKAGES DID NOT MATCH THE HASHES.*\n' + r' tracefront==0.1 .*:\n' + r' Expected sha256 somehash\n' + r' Got [0-9a-f]+$', + reqset.prepare_files, + finder) + + def test_missing_hash_with_require_hashes(self, data): + """Setting --require-hashes explicitly should raise errors if hashes + are missing. + """ + reqset = self.basic_reqset(require_hashes=True) + reqset.add_requirement( + list(process_line('simple==1.0', 'file', 1))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + r'These requirements were missing hashes.*\n' + r' simple==1.0 --sha256=393043e672415891885c9a2a0929b1af95fb866' + r'd6ca016b42d2e6ce53619b653$', + reqset.prepare_files, + finder) + + def test_unsupported_hashes(self, data): # NEXT: Add any other test cases needed, probably delete the ones in test_install or just have one or two functional tests to make sure prepare_files() gets called when we expect (so we can actually stop on hash errors), clean up, and call it a day. Make sure we test that hashes are checked all 3 places in pip.download. Test http success. + """VCS and dir links should raise errors when --require-hashes is + on. + + In addition, complaints about the type of requirement (VCS or dir) + should trump the presence or absence of a hash. + + """ + reqset = self.basic_reqset(require_hashes=True) + reqset.add_requirement( + list(process_line( + 'git+git://github.com/pypa/pip-test-package --sha256=12345', + 'file', + 1))[0]) + dir_path = data.packages.join('FSPkg') + reqset.add_requirement( + list(process_line( + 'file://%s' % (dir_path,), + 'file', + 2))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + r"Can't verify hashes for these requirements because we don't " + r"have a way to hash version control repositories:\n" + r" git\+git://github\.com/pypa/pip-test-package \(from -r file " + r"\(line 1\)\)\n" + r"Can't verify hashes for these file:// requirements because they " + r"point to directories:\n" + r" file:///.*/data/packages/FSPkg \(from -r file \(line 2\)\)", + reqset.prepare_files, + finder) + + def test_unpinned_hash_checking(self, data): + """Make sure prepare_files() raises an error when a requirement is not + version-pinned in hash-checking mode. + """ + reqset = self.basic_reqset() + # Test that there must be exactly 1 specifier: + reqset.add_requirement( + list(process_line('simple --sha256=a90427ae31f5d1d0d7ec06ee97d9fcf' + '2d0fc9a786985250c1c83fd68df5911dd', + 'file', + 1))[0]) + # Test that the operator must be ==: + reqset.add_requirement( + list(process_line('simple2>1.0 --sha256=3ad45e1e9aa48b4462af0123f6' + 'a7e44a9115db1ef945d4d92c123dfe21815a06', + 'file', + 2))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + # Make sure all failing requirements are listed: + r'version pinned with ==. These do not:\n' + r' simple .* \(from -r file \(line 1\)\)\n' + r' simple2>1.0 .* \(from -r file \(line 2\)\)', + reqset.prepare_files, + finder) + + def test_hash_mismatch(self, data): + """A hash mismatch should raise an error.""" + file_url = path_to_url( + (data.packages / 'simple-1.0.tar.gz').abspath) + reqset = self.basic_reqset(require_hashes=True) + reqset.add_requirement( + list(process_line('%s --sha256=badbad' % file_url, 'file', 1))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + r'THESE PACKAGES DID NOT MATCH THE HASHES.*\n' + r' file:///.*/data/packages/simple-1\.0\.tar\.gz .*:\n' + r' Expected sha256 badbad\n' + r' Got 393043e672415891885c9a2a0929b1af95fb866d' + r'6ca016b42d2e6ce53619b653$', + reqset.prepare_files, + finder) + + def test_no_deps_on_require_hashes(self, data): + """Make sure --require-hashes mode implies --no-deps.""" + reqset = self.basic_reqset() + finder = PackageFinder([data.find_links], [], session=PipSession()) + req = list(process_line( + 'TopoRequires2==0.0.1 ' + '--sha256=eaf9a01242c9f2f42cf2bd82a6a848cd' + 'e3591d14f7896bdbefcf48543720c970', + 'file', 1))[0] + deps = reqset._prepare_file(finder, req, require_hashes=True) + assert deps == [], ('_prepare_files() resolved dependencies even ' + 'though --require-hashes was on.') + + def test_no_egg_on_require_hashes(self, data): + """Make sure --egg is illegal with --require-hashes. + + --egg would cause dependencies to always be installed, since it cedes + control directly to setuptools. + + """ + reqset = self.basic_reqset(require_hashes=True, as_egg=True) + finder = PackageFinder([data.find_links], [], session=PipSession()) + with pytest.raises(InstallationError): + reqset.prepare_files(finder) + @pytest.mark.parametrize(('file_contents', 'expected'), [ (b'\xf6\x80', b'\xc3\xb6\xe2\x82\xac'), # cp1252 diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 1631d1dd610..c5fd9b72060 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -13,6 +13,7 @@ from pip.req.req_install import InstallRequirement from pip.req.req_file import (parse_requirements, process_line, join_lines, ignore_comments, break_args_options) +from tests.lib import requirements_file @pytest.fixture @@ -480,12 +481,11 @@ def test_install_requirements_with_options(self, tmpdir, finder, session, --install-option "{install_option}" '''.format(global_option=global_option, install_option=install_option) - req_path = tmpdir.join('requirements.txt') - with open(req_path, 'w') as fh: - fh.write(content) - - req = next(parse_requirements( - req_path, finder=finder, options=options, session=session)) + with requirements_file(content, tmpdir) as reqs_file: + req = next(parse_requirements(reqs_file.abspath, + finder=finder, + options=options, + session=session)) req.source_dir = os.curdir with patch.object(subprocess, 'Popen') as popen: diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index bb025a3f013..87395127714 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -12,9 +12,12 @@ import pytest from mock import Mock, patch +from pip.exceptions import HashMismatch, HashMissing, InstallationError from pip.utils import (egg_link_path, Inf, get_installed_distributions, untar_file, unzip_file, rmtree, normalize_path) +from pip.utils.hashes import Hashes, MissingHashes from pip.operations.freeze import freeze_excludes +from pip._vendor.six import StringIO class Tests_EgglinkPath: @@ -406,3 +409,47 @@ def test_resolve_symlinks(self, tmpdir): ) == os.path.join(tmpdir, 'file_link') finally: os.chdir(orig_working_dir) + + +class TestHashes(object): + """Tests for pip.utils.hashes""" + + def test_success(self, tmpdir): + """Make sure no error is raised when at least one hash matches. + + Test check_against_path because it calls everything else. + + """ + file = tmpdir / 'to_hash' + file.write('hello') + hashes = Hashes({ + 'sha256': ['2cf24dba5fb0a30e26e83b2ac5b9e29e' + '1b161e5c1fa7425e73043362938b9824'], + 'sha224': ['wrongwrong'], + 'md5': ['5d41402abc4b2a76b9719d911017c592']}) + hashes.check_against_path(file) + + def test_failure(self): + """Hashes should raise HashMismatch when no hashes match.""" + hashes = Hashes({'sha256': ['wrongwrong']}) + with pytest.raises(HashMismatch): + hashes.check_against_file(StringIO('hello')) + + def test_missing_hashes(self): + """MissingHashes should raise HashMissing when any check is done.""" + with pytest.raises(HashMissing): + MissingHashes().check_against_file(StringIO('hello')) + + def test_unknown_hash(self): + """Hashes should raise InstallationError when it encounters an unknown + hash.""" + hashes = Hashes({'badbad': ['dummy']}) + with pytest.raises(InstallationError): + hashes.check_against_file(StringIO('hello')) + + def test_non_zero(self): + """Test that truthiness tests tell whether any known-good hashes + exist.""" + assert Hashes({'sha256': 'dummy'}) + assert not Hashes() + assert not Hashes({}) From 11dbb924409bd7700e4b8553e0751f09a14615f4 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 24 Sep 2015 18:31:14 -0400 Subject: [PATCH 06/39] Switch from --sha256 etc. to a single option: --hash. Everybody seems to favor this. Spelled -H, it's still pretty short. And it is less unusual programmatically. --- pip/cmdoptions.py | 65 ++++++++++++++++---------------- pip/commands/install.py | 3 +- pip/exceptions.py | 17 +++++---- pip/req/req_file.py | 5 ++- tests/functional/test_install.py | 6 +-- tests/unit/test_req.py | 16 ++++---- tests/unit/test_req_file.py | 11 +++--- 7 files changed, 62 insertions(+), 61 deletions(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 02ff771859e..d17e919c043 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -524,42 +524,43 @@ def only_binary(): ) def _good_hashes(): - """Return names of hashlib algorithms at least as strong as sha256. - - Preserve the order from hashlib.algorithms so --help comes out in - deterministic order. - - """ + """Return names of hashlib algorithms at least as strong as sha256.""" # Remove getattr when 2.6 dies. - algos = getattr(hashlib, - 'algorithms', - ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512')) - return [a for a in algos if a not in set(['md5', 'sha1', 'sha224'])] + algos = set( + getattr(hashlib, + 'algorithms', + ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'))) + return algos - set(['md5', 'sha1', 'sha224']) + -def _merge_mapping(option, opt_str, value, parser): - """Append the value to a list pointed to by the option name in a dict.""" +def _merge_hash(option, opt_str, value, parser): + """Given a value spelled "algo:digest", append the digest to a list + pointed to in a dict by the algo name.""" if not parser.values.hashes: parser.values.hashes = {} - parser.values.hashes.setdefault(opt_str[2:], []).append(value) - -def hash_options(): - """Return an iterable of options named after hashlib's algorithms. - - Leave out ones weaker than sha256. - - """ - for algo_name in _good_hashes(): - yield partial(Option, - '--' + algo_name, - # Hash values eventually end up in - # InstallRequirement.hashes due to __dict__ copying in - # process_line(). - dest='hashes', - action='callback', - callback=_merge_mapping, - type='string', - help="Verify that the package's archive matches this " - 'hash before installing.') + try: + algo, digest = value.split(':', 1) + except ValueError: + parser.error('Arguments to %s must be a hash name ' + 'followed by a value, like --hash=sha256:abcde...' % + opt_str) + goods = _good_hashes() + if algo not in goods: + parser.error('Allowed hash algorithms for %s are %s.' % + (opt_str, ', '.join(sorted(goods)))) + parser.values.hashes.setdefault(algo, []).append(digest) + +hash = partial( + Option, + '-H', '--hash', + # Hash values eventually end up in InstallRequirement.hashes due to + # __dict__ copying in process_line(). + dest='hashes', + action='callback', + callback=_merge_hash, + type='string', + help="Verify that the package's archive matches this " + 'hash before installing. Example: --hash=sha256:abcdef...') ########## diff --git a/pip/commands/install.py b/pip/commands/install.py index fab83d0e290..a4267922a32 100644 --- a/pip/commands/install.py +++ b/pip/commands/install.py @@ -165,8 +165,7 @@ def __init__(self, *args, **kw): action='store_true', help='Perform a provably repeatable installation by requiring a ' 'hash to check each package against. Implied by the presence ' - 'of a hash flag, like --sha256, on any individual ' - 'requirement') + 'of a --hash option on any individual requirement') index_opts = cmdoptions.make_option_group( cmdoptions.index_group, diff --git a/pip/exceptions.py b/pip/exceptions.py index 4fa16c0a85f..7842fc14e8b 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -171,14 +171,15 @@ def head(cls): 'requirements files to pin them down.') def body(self): - return ' %s --%s=%s' % (self.req.req if self.req and - # In case someone feeds something - # downright stupid to - # InstallRequirement's constructor: - getattr(self.req, 'req', None) - else 'unknown package', - FAVORITE_HASH, - self.gotten_hash) + package_name = (self.req.req if self.req and + # In case someone feeds something + # downright stupid to + # InstallRequirement's constructor: + getattr(self.req, 'req', None) + else 'unknown package') + return ' %s --hash=%s:%s' % (package_name, + FAVORITE_HASH, + self.gotten_hash) class HashUnpinned(HashError): diff --git a/pip/req/req_file.py b/pip/req/req_file.py index daa7b285619..d2895b1b3b4 100644 --- a/pip/req/req_file.py +++ b/pip/req/req_file.py @@ -51,8 +51,9 @@ # options to be passed to requirements SUPPORTED_OPTIONS_REQ = [ cmdoptions.install_options, - cmdoptions.global_options -] + list(cmdoptions.hash_options()) + cmdoptions.global_options, + cmdoptions.hash, +] # the 'dest' string values SUPPORTED_OPTIONS_REQ_DEST = [o().dest for o in SUPPORTED_OPTIONS_REQ] diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 8f54fcd66c2..5b498514567 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -228,8 +228,8 @@ def test_hashed_install_success(script, data, tmpdir): """ file_url = path_to_url( (data.packages / 'simple-1.0.tar.gz').abspath) - with requirements_file('simple2==1.0 --sha256=9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c7\n' - '{simple} --sha256=393043e672415891885c9a2a0929b1af95fb866d6ca016b42d2e6ce53619b653'.format(simple=file_url), + with requirements_file('simple2==1.0 --hash=sha256:9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c7\n' + '{simple} --hash=sha256:393043e672415891885c9a2a0929b1af95fb866d6ca016b42d2e6ce53619b653'.format(simple=file_url), tmpdir) as reqs_file: result = script.pip_install_local('-r', reqs_file.abspath, @@ -246,7 +246,7 @@ def test_hashed_install_failure(script, data, tmpdir): """ file_url = path_to_url( (data.packages / 'simple-1.0.tar.gz').abspath) - with requirements_file('simple2==1.0 --sha256=9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c\n', + with requirements_file('simple2==1.0 --hash=sha256:9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c\n', tmpdir) as reqs_file: result = script.pip_install_local('-r', reqs_file.abspath, diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 254e004956a..44d00bc3cf3 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -85,7 +85,7 @@ def test_missing_hash_checking(self, data): list(process_line('blessings==1.0', 'file', 1))[0]) # This flag activates --require-hashes mode: reqset.add_requirement( - list(process_line('tracefront==0.1 --sha256=somehash', 'file', 2))[0]) + list(process_line('tracefront==0.1 --hash=sha256:somehash', 'file', 2))[0]) # This hash should be accepted because it came from the reqs file, not # from the internet: reqset.add_requirement( @@ -98,7 +98,7 @@ def test_missing_hash_checking(self, data): assert_raises_regexp( HashErrors, r'These requirements were missing hashes.*\n' - r' blessings==1.0 --sha256=[0-9a-f]+\n' + r' blessings==1.0 --hash=sha256:[0-9a-f]+\n' r'THESE PACKAGES DID NOT MATCH THE HASHES.*\n' r' tracefront==0.1 .*:\n' r' Expected sha256 somehash\n' @@ -117,7 +117,7 @@ def test_missing_hash_with_require_hashes(self, data): assert_raises_regexp( HashErrors, r'These requirements were missing hashes.*\n' - r' simple==1.0 --sha256=393043e672415891885c9a2a0929b1af95fb866' + r' simple==1.0 --hash=sha256:393043e672415891885c9a2a0929b1af95fb866' r'd6ca016b42d2e6ce53619b653$', reqset.prepare_files, finder) @@ -133,7 +133,7 @@ def test_unsupported_hashes(self, data): # NEXT: Add any other test cases neede reqset = self.basic_reqset(require_hashes=True) reqset.add_requirement( list(process_line( - 'git+git://github.com/pypa/pip-test-package --sha256=12345', + 'git+git://github.com/pypa/pip-test-package --hash=sha256:12345', 'file', 1))[0]) dir_path = data.packages.join('FSPkg') @@ -162,13 +162,13 @@ def test_unpinned_hash_checking(self, data): reqset = self.basic_reqset() # Test that there must be exactly 1 specifier: reqset.add_requirement( - list(process_line('simple --sha256=a90427ae31f5d1d0d7ec06ee97d9fcf' + list(process_line('simple --hash=sha256:a90427ae31f5d1d0d7ec06ee97d9fcf' '2d0fc9a786985250c1c83fd68df5911dd', 'file', 1))[0]) # Test that the operator must be ==: reqset.add_requirement( - list(process_line('simple2>1.0 --sha256=3ad45e1e9aa48b4462af0123f6' + list(process_line('simple2>1.0 --hash=sha256:3ad45e1e9aa48b4462af0123f6' 'a7e44a9115db1ef945d4d92c123dfe21815a06', 'file', 2))[0]) @@ -188,7 +188,7 @@ def test_hash_mismatch(self, data): (data.packages / 'simple-1.0.tar.gz').abspath) reqset = self.basic_reqset(require_hashes=True) reqset.add_requirement( - list(process_line('%s --sha256=badbad' % file_url, 'file', 1))[0]) + list(process_line('%s --hash=sha256:badbad' % file_url, 'file', 1))[0]) finder = PackageFinder([data.find_links], [], session=PipSession()) assert_raises_regexp( HashErrors, @@ -206,7 +206,7 @@ def test_no_deps_on_require_hashes(self, data): finder = PackageFinder([data.find_links], [], session=PipSession()) req = list(process_line( 'TopoRequires2==0.0.1 ' - '--sha256=eaf9a01242c9f2f42cf2bd82a6a848cd' + '--hash=sha256:eaf9a01242c9f2f42cf2bd82a6a848cd' 'e3591d14f7896bdbefcf48543720c970', 'file', 1))[0] deps = reqset._prepare_file(finder, req, require_hashes=True) diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index c5fd9b72060..3e437cc2a8a 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -163,16 +163,15 @@ def test_options_on_a_requirement_line(self): 'install_options': ['yo1', 'yo2']} def test_hash_options(self): - """Test the runtime-generated Options that correspond to hashlib - algorithms. + """Test the --hash option: mostly its value storage. - Make sure they read and preserve multiple hashes. + Make sure it reads and preserve multiple hashes. """ line = ('SomeProject ' - '--sha256=2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 ' - '--sha384=59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' - '--sha256=486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7') + '--hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 ' + '-H sha384:59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' + '--hash=sha256:486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7') filename = 'filename' req = list(process_line(line, filename, 1))[0] assert req.options == {'hashes': { From 0c17248998e0fdab427bb9d03bc1e510f9e665ea Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 24 Sep 2015 18:53:39 -0400 Subject: [PATCH 07/39] Pass PEP 8 checks. --- pip/cmdoptions.py | 2 ++ pip/download.py | 10 ++++----- pip/exceptions.py | 2 +- pip/req/req_set.py | 4 +--- tests/functional/test_install.py | 19 ++++++++--------- tests/unit/test_req.py | 35 ++++++++++++++++---------------- tests/unit/test_req_file.py | 19 ++++++++++------- 7 files changed, 48 insertions(+), 43 deletions(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index d17e919c043..40873064e67 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -523,6 +523,7 @@ def only_binary(): help=SUPPRESS_HELP, ) + def _good_hashes(): """Return names of hashlib algorithms at least as strong as sha256.""" # Remove getattr when 2.6 dies. @@ -550,6 +551,7 @@ def _merge_hash(option, opt_str, value, parser): (opt_str, ', '.join(sorted(goods)))) parser.values.hashes.setdefault(algo, []).append(digest) + hash = partial( Option, '-H', '--hash', diff --git a/pip/download.py b/pip/download.py index abd214655c0..05bfffd3977 100644 --- a/pip/download.py +++ b/pip/download.py @@ -2,7 +2,6 @@ import cgi import email.utils -import hashlib import getpass import json import logging @@ -29,9 +28,8 @@ from pip.models import PyPI from pip.utils import (splitext, rmtree, format_size, display_path, backup_dir, ask_path_exists, unpack_file, - call_subprocess, ARCHIVE_EXTENSIONS, consume) + ARCHIVE_EXTENSIONS, consume) from pip.utils.filesystem import check_path_owner -from pip.utils.logging import indent_log from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner from pip.locations import write_delete_marker_file from pip.vcs import vcs @@ -587,7 +585,7 @@ def written_chunks(chunks): downloaded_chunks = written_chunks(progress_indicator(resp_read(4096), 4096)) if hashes: - hashes.check_against_chunks(downloaded_chunks) + hashes.check_against_chunks(downloaded_chunks) else: consume(downloaded_chunks) @@ -686,7 +684,9 @@ def unpack_file_url(link, location, download_dir=None, hashes=None): # If a download dir is specified, is the file already there and valid? already_downloaded_path = None if download_dir: - already_downloaded_path = _check_download_dir(link, download_dir, hashes) + already_downloaded_path = _check_download_dir(link, + download_dir, + hashes) if already_downloaded_path: from_path = already_downloaded_path diff --git a/pip/exceptions.py b/pip/exceptions.py index 7842fc14e8b..14045164667 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -115,7 +115,7 @@ def body(self): def __str__(self): return '%s\n%s' % (self.head(), self.body()) - def _requirement_name(self): # TODO: Make sure this is the best it can be and is DRY with subclasses. + def _requirement_name(self): """Return a description of the requirement that triggered me. This default implementation returns long description of the req, with diff --git a/pip/req/req_set.py b/pip/req/req_set.py index 878bc8f74f7..f42ad1a8ca9 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -1,7 +1,6 @@ from __future__ import absolute_import from collections import defaultdict -import functools from itertools import chain import logging import os @@ -359,7 +358,6 @@ def prepare_files(self, finder): if hash_errors: raise hash_errors - def _check_skip_installed(self, req_to_install, finder): """Check if req_to_install should be skipped. @@ -529,7 +527,7 @@ def _prepare_file(self, elif is_file_url(link) and is_dir_url(link): raise DirectoryUrlHashUnsupported() if (not req_to_install.original_link and - not req_to_install.is_pinned): + not req_to_install.is_pinned): # Unpinned packages are asking for trouble when a new # version is uploaded. This isn't a security check, but # it saves users a surprising hash mismatch in the diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 5b498514567..12d5f3f01ca 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -228,12 +228,13 @@ def test_hashed_install_success(script, data, tmpdir): """ file_url = path_to_url( (data.packages / 'simple-1.0.tar.gz').abspath) - with requirements_file('simple2==1.0 --hash=sha256:9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c7\n' - '{simple} --hash=sha256:393043e672415891885c9a2a0929b1af95fb866d6ca016b42d2e6ce53619b653'.format(simple=file_url), - tmpdir) as reqs_file: - result = script.pip_install_local('-r', - reqs_file.abspath, - expect_error=False) + with requirements_file( + 'simple2==1.0 --hash=sha256:9336af72ca661e6336eb87bc7de3e8844d853e' + '3848c2b9bbd2e8bf01db88c2c7\n' + '{simple} --hash=sha256:393043e672415891885c9a2a0929b1af95fb866d6c' + 'a016b42d2e6ce53619b653'.format(simple=file_url), + tmpdir) as reqs_file: + script.pip_install_local('-r', reqs_file.abspath, expect_error=False) def test_hashed_install_failure(script, data, tmpdir): @@ -244,9 +245,8 @@ def test_hashed_install_failure(script, data, tmpdir): kinds of hashes are in test_req.py. """ - file_url = path_to_url( - (data.packages / 'simple-1.0.tar.gz').abspath) - with requirements_file('simple2==1.0 --hash=sha256:9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c\n', + with requirements_file('simple2==1.0 --hash=sha256:9336af72ca661e6336eb87b' + 'c7de3e8844d853e3848c2b9bbd2e8bf01db88c2c\n', tmpdir) as reqs_file: result = script.pip_install_local('-r', reqs_file.abspath, @@ -254,7 +254,6 @@ def test_hashed_install_failure(script, data, tmpdir): assert len(result.files_created) == 0 - def test_install_from_local_directory_with_symlinks_to_directories( script, data): """ diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 44d00bc3cf3..906e5e221e0 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -9,8 +9,7 @@ from pip.exceptions import (PreviousBuildDirError, InvalidWheelFilename, UnsupportedWheel) from pip.download import path_to_url, PipSession -from pip.exceptions import (HashMissing, HashUnpinned, VcsHashUnsupported, - HashErrors, InstallationError) +from pip.exceptions import HashErrors, InstallationError from pip.index import PackageFinder from pip.req import (InstallRequirement, RequirementSet, Requirements) from pip.req.req_file import process_line @@ -85,7 +84,9 @@ def test_missing_hash_checking(self, data): list(process_line('blessings==1.0', 'file', 1))[0]) # This flag activates --require-hashes mode: reqset.add_requirement( - list(process_line('tracefront==0.1 --hash=sha256:somehash', 'file', 2))[0]) + list(process_line('tracefront==0.1 --hash=sha256:somehash', + 'file', + 2))[0]) # This hash should be accepted because it came from the reqs file, not # from the internet: reqset.add_requirement( @@ -117,12 +118,12 @@ def test_missing_hash_with_require_hashes(self, data): assert_raises_regexp( HashErrors, r'These requirements were missing hashes.*\n' - r' simple==1.0 --hash=sha256:393043e672415891885c9a2a0929b1af95fb866' - r'd6ca016b42d2e6ce53619b653$', + r' simple==1.0 --hash=sha256:393043e672415891885c9a2a0929b1af95' + r'fb866d6ca016b42d2e6ce53619b653$', reqset.prepare_files, finder) - def test_unsupported_hashes(self, data): # NEXT: Add any other test cases needed, probably delete the ones in test_install or just have one or two functional tests to make sure prepare_files() gets called when we expect (so we can actually stop on hash errors), clean up, and call it a day. Make sure we test that hashes are checked all 3 places in pip.download. Test http success. + def test_unsupported_hashes(self, data): """VCS and dir links should raise errors when --require-hashes is on. @@ -133,7 +134,7 @@ def test_unsupported_hashes(self, data): # NEXT: Add any other test cases neede reqset = self.basic_reqset(require_hashes=True) reqset.add_requirement( list(process_line( - 'git+git://github.com/pypa/pip-test-package --hash=sha256:12345', + 'git+git://github.com/pypa/pip-test-package -H sha256:12345', 'file', 1))[0]) dir_path = data.packages.join('FSPkg') @@ -162,16 +163,16 @@ def test_unpinned_hash_checking(self, data): reqset = self.basic_reqset() # Test that there must be exactly 1 specifier: reqset.add_requirement( - list(process_line('simple --hash=sha256:a90427ae31f5d1d0d7ec06ee97d9fcf' - '2d0fc9a786985250c1c83fd68df5911dd', + list(process_line('simple --hash=sha256:a90427ae31f5d1d0d7ec06ee97' + 'd9fcf2d0fc9a786985250c1c83fd68df5911dd', 'file', 1))[0]) # Test that the operator must be ==: - reqset.add_requirement( - list(process_line('simple2>1.0 --hash=sha256:3ad45e1e9aa48b4462af0123f6' - 'a7e44a9115db1ef945d4d92c123dfe21815a06', - 'file', - 2))[0]) + reqset.add_requirement(list(process_line( + 'simple2>1.0 --hash=sha256:3ad45e1e9aa48b4462af0' + '123f6a7e44a9115db1ef945d4d92c123dfe21815a06', + 'file', + 2))[0]) finder = PackageFinder([data.find_links], [], session=PipSession()) assert_raises_regexp( HashErrors, @@ -188,7 +189,7 @@ def test_hash_mismatch(self, data): (data.packages / 'simple-1.0.tar.gz').abspath) reqset = self.basic_reqset(require_hashes=True) reqset.add_requirement( - list(process_line('%s --hash=sha256:badbad' % file_url, 'file', 1))[0]) + list(process_line('%s -H sha256:badbad' % file_url, 'file', 1))[0]) finder = PackageFinder([data.find_links], [], session=PipSession()) assert_raises_regexp( HashErrors, @@ -196,7 +197,7 @@ def test_hash_mismatch(self, data): r' file:///.*/data/packages/simple-1\.0\.tar\.gz .*:\n' r' Expected sha256 badbad\n' r' Got 393043e672415891885c9a2a0929b1af95fb866d' - r'6ca016b42d2e6ce53619b653$', + r'6ca016b42d2e6ce53619b653$', reqset.prepare_files, finder) @@ -207,7 +208,7 @@ def test_no_deps_on_require_hashes(self, data): req = list(process_line( 'TopoRequires2==0.0.1 ' '--hash=sha256:eaf9a01242c9f2f42cf2bd82a6a848cd' - 'e3591d14f7896bdbefcf48543720c970', + 'e3591d14f7896bdbefcf48543720c970', 'file', 1))[0] deps = reqset._prepare_file(finder, req, require_hashes=True) assert deps == [], ('_prepare_files() resolved dependencies even ' diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 3e437cc2a8a..901c1a7ae35 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -168,16 +168,21 @@ def test_hash_options(self): Make sure it reads and preserve multiple hashes. """ - line = ('SomeProject ' - '--hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 ' - '-H sha384:59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' - '--hash=sha256:486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7') + line = ('SomeProject --hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b1' + '61e5c1fa7425e73043362938b9824 ' + '-H sha384:59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553' + 'bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' + '--hash=sha256:486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8' + 'e5a6c65260e9cb8a7') filename = 'filename' req = list(process_line(line, filename, 1))[0] assert req.options == {'hashes': { - 'sha256': ['2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824', - '486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7'], - 'sha384': ['59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f']}} + 'sha256': ['2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e730433' + '62938b9824', + '486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65' + '260e9cb8a7'], + 'sha384': ['59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcd' + 'b9c666fa90125a3c79f90397bdf5f6a13de828684f']}} def test_set_isolated(self, options): line = 'SomeProject' From b0ef6ab5b493004ed29c9189c610722f51816314 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 25 Sep 2015 12:24:32 -0400 Subject: [PATCH 08/39] Fix unicode errors in unit tests of Hashes under Python 3. --- tests/unit/test_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 87395127714..179055fbe14 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -17,7 +17,7 @@ untar_file, unzip_file, rmtree, normalize_path) from pip.utils.hashes import Hashes, MissingHashes from pip.operations.freeze import freeze_excludes -from pip._vendor.six import StringIO +from pip._vendor.six import BytesIO class Tests_EgglinkPath: @@ -433,19 +433,19 @@ def test_failure(self): """Hashes should raise HashMismatch when no hashes match.""" hashes = Hashes({'sha256': ['wrongwrong']}) with pytest.raises(HashMismatch): - hashes.check_against_file(StringIO('hello')) + hashes.check_against_file(BytesIO(b'hello')) def test_missing_hashes(self): """MissingHashes should raise HashMissing when any check is done.""" with pytest.raises(HashMissing): - MissingHashes().check_against_file(StringIO('hello')) + MissingHashes().check_against_file(BytesIO(b'hello')) def test_unknown_hash(self): """Hashes should raise InstallationError when it encounters an unknown hash.""" hashes = Hashes({'badbad': ['dummy']}) with pytest.raises(InstallationError): - hashes.check_against_file(StringIO('hello')) + hashes.check_against_file(BytesIO(b'hello')) def test_non_zero(self): """Test that truthiness tests tell whether any known-good hashes From f3f73f1c0774c256ec318b86bfac79d43a0aa407 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 25 Sep 2015 15:32:33 -0400 Subject: [PATCH 09/39] Remove the -H spelling for --hashes. dstufft is nervous about blowing a single-char option on something that will usually be copied and pasted anyway. We can always put it back later if it proves to be a pain. --- pip/cmdoptions.py | 2 +- tests/unit/test_req.py | 6 ++++-- tests/unit/test_req_file.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 40873064e67..48e766411d5 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -554,7 +554,7 @@ def _merge_hash(option, opt_str, value, parser): hash = partial( Option, - '-H', '--hash', + '--hash', # Hash values eventually end up in InstallRequirement.hashes due to # __dict__ copying in process_line(). dest='hashes', diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 906e5e221e0..cbdb3f5fa2a 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -134,7 +134,7 @@ def test_unsupported_hashes(self, data): reqset = self.basic_reqset(require_hashes=True) reqset.add_requirement( list(process_line( - 'git+git://github.com/pypa/pip-test-package -H sha256:12345', + 'git+git://github.com/pypa/pip-test-package --hash=sha256:123', 'file', 1))[0]) dir_path = data.packages.join('FSPkg') @@ -189,7 +189,9 @@ def test_hash_mismatch(self, data): (data.packages / 'simple-1.0.tar.gz').abspath) reqset = self.basic_reqset(require_hashes=True) reqset.add_requirement( - list(process_line('%s -H sha256:badbad' % file_url, 'file', 1))[0]) + list(process_line('%s --hash=sha256:badbad' % file_url, + 'file', + 1))[0]) finder = PackageFinder([data.find_links], [], session=PipSession()) assert_raises_regexp( HashErrors, diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 901c1a7ae35..676d8ad2bc0 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -170,8 +170,8 @@ def test_hash_options(self): """ line = ('SomeProject --hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b1' '61e5c1fa7425e73043362938b9824 ' - '-H sha384:59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553' - 'bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' + '--hash=sha384:59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c' + '3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' '--hash=sha256:486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8' 'e5a6c65260e9cb8a7') filename = 'filename' From 910b82c59da3c8e642e8728b26008dc90e66c771 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 25 Sep 2015 19:07:20 -0400 Subject: [PATCH 10/39] --require-hashes no longer implies --no-deps. For dependencies that are properly pinned and hashed (not really dependencies at all, if you like, since they're explicit, root-level requirements), we install them as normal. For ones that are not pinned and hashes, we raise the errors typical of any unhashed requirement in --require-hashes mode. Since the stanza under "if not ignore_dependencies" doesn't actually add anything if it's already in the RequirementSet, not much has to be done in the way of code: the unhashed deps don't have any hashes, so we complain about them as per usual. Also... * Revise wording of HashUnpinned errors. They can be raised even if no hash is specified, so the previous wording was misleading. * Make wording of HashMissing less awkward. --- pip/exceptions.py | 14 ++++++------- pip/req/req_set.py | 14 +++++-------- tests/unit/test_req.py | 45 +++++++++++++++++++++++++++++++++--------- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/pip/exceptions.py b/pip/exceptions.py index 14045164667..3c4c6f590dd 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -163,12 +163,12 @@ def __init__(self, gotten_hash): @classmethod def head(cls): - return ('These requirements were missing hashes, which leaves them ' - 'open to tampering. (Hashes are required in --require-hashes ' - 'mode, which is implicitly on when a hash is specified for ' - 'any package.) Here are the hashes the downloaded archives ' + return ('Hashes are required in --require-hashes mode (implicitly on ' + 'when a hash is specified for any package). These ' + 'requirements were missing hashes, leaving them open to ' + 'tampering. These are the hashes the downloaded archives ' 'actually had. You can add lines like these to your ' - 'requirements files to pin them down.') + 'requirements files to prevent tampering.') def body(self): package_name = (self.req.req if self.req and @@ -190,8 +190,8 @@ class HashUnpinned(HashError): @classmethod def head(cls): - return ('When a hash is specified, a requirement must also have its ' - 'version pinned with ==. These do not:') + return ('In --require-hashes mode, all requirements must have their ' + 'versions pinned with ==. These do not:') class HashMismatch(HashError): diff --git a/pip/req/req_set.py b/pip/req/req_set.py index f42ad1a8ca9..7d0110d67ad 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -338,10 +338,10 @@ def prepare_files(self, finder): 'delegates dependency resolution to setuptools and could thus ' 'result in installation of unhashed packages.') - # Actually prepare the files, and collect any exceptions. The - # *HashUnsupported exceptions cannot be checked ahead of time, because - # req.populate_links() needs to be called before we can examine the - # link type. + # Actually prepare the files, and collect any exceptions. Most hash + # exceptions cannot be checked ahead of time, because + # req.populate_links() needs to be called before we can make decisions + # based on link type. discovered_reqs = [] hash_errors = HashErrors() for req in chain(root_reqs, discovered_reqs): @@ -629,11 +629,7 @@ def add_req(subreq): # 'unnamed' requirements will get added here self.add_requirement(req_to_install, None) - if not ignore_dependencies and not require_hashes: - # --require-hashes implies --no-deps because, otherwise, - # unhashed dependencies could creep in. In the future, we - # should report unhashed dependencies rather than just not - # installing them. + if not ignore_dependencies: if (req_to_install.extras): logger.debug( "Installing extra requirements: %r", diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index cbdb3f5fa2a..d71256a5b4f 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -177,7 +177,7 @@ def test_unpinned_hash_checking(self, data): assert_raises_regexp( HashErrors, # Make sure all failing requirements are listed: - r'version pinned with ==. These do not:\n' + r'versions pinned with ==. These do not:\n' r' simple .* \(from -r file \(line 1\)\)\n' r' simple2>1.0 .* \(from -r file \(line 2\)\)', reqset.prepare_files, @@ -203,18 +203,45 @@ def test_hash_mismatch(self, data): reqset.prepare_files, finder) - def test_no_deps_on_require_hashes(self, data): - """Make sure --require-hashes mode implies --no-deps.""" + def test_unhashed_deps_on_require_hashes(self, data): + """Make sure unhashed, unpinned, or otherwise unrepeatable + dependencies get complained about when --require-hashes is on.""" reqset = self.basic_reqset() finder = PackageFinder([data.find_links], [], session=PipSession()) - req = list(process_line( - 'TopoRequires2==0.0.1 ' + reqset.add_requirement(next(process_line( + 'TopoRequires2==0.0.1 ' # requires TopoRequires '--hash=sha256:eaf9a01242c9f2f42cf2bd82a6a848cd' 'e3591d14f7896bdbefcf48543720c970', - 'file', 1))[0] - deps = reqset._prepare_file(finder, req, require_hashes=True) - assert deps == [], ('_prepare_files() resolved dependencies even ' - 'though --require-hashes was on.') + 'file', 1))) + assert_raises_regexp( + HashErrors, + r'In --require-hashes mode, all requirements must have their ' + r'versions pinned.*\n' + r' TopoRequires from .*$', + reqset.prepare_files, + finder) + + def test_hashed_deps_on_require_hashes(self, data): + """Make sure hashed dependencies get installed when --require-hashes + is on. + + (We actually just check that no error gets raised while preparing; + there is no reason to expect installation to then fail, as the code + paths are the same as ever.) + + """ + reqset = self.basic_reqset() + finder = PackageFinder([data.find_links], [], session=PipSession()) + reqset.add_requirement(next(process_line( + 'TopoRequires2==0.0.1 ' # requires TopoRequires + '--hash=sha256:eaf9a01242c9f2f42cf2bd82a6a848cd' + 'e3591d14f7896bdbefcf48543720c970', + 'file', 1))) + reqset.add_requirement(next(process_line( + 'TopoRequires==0.0.1 ' + '--hash=sha256:d6dd1e22e60df512fdcf3640ced3039b3b02a56ab2cee81ebcb' + '3d0a6d4e8bfa6', + 'file', 2))) def test_no_egg_on_require_hashes(self, data): """Make sure --egg is illegal with --require-hashes. From 4f6737407b86daa7d638cd44b7ef7665ac9e91fe Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 7 Oct 2015 16:31:41 -0400 Subject: [PATCH 11/39] Correct the level of the Wheel Cache heading. Previously, Hash Verification, Editable Installs, Controlling setup_requires, and Build System Interface were all getting placed under it. --- docs/reference/pip_install.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index ff7d5659221..3896d01b438 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -423,8 +423,8 @@ Windows .. _`Wheel cache`: -Wheel cache -*********** +Wheel Cache +~~~~~~~~~~~ Pip will read from the subdirectory ``wheels`` within the pip cache dir and use any packages found there. This is disabled via the same ``no-cache-dir`` option From 14506f82611c914d3190a301fa8d8b2d0ee49168 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 7 Oct 2015 16:35:27 -0400 Subject: [PATCH 12/39] Document hash-checking mode. --- docs/reference/pip_install.rst | 80 ++++++++++++++++++++++++++++------ docs/user_guide.rst | 79 ++++++++++++++++++++++++--------- 2 files changed, 124 insertions(+), 35 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index 3896d01b438..12626ea273d 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -378,8 +378,8 @@ See the :ref:`pip install Examples`. SSL Certificate Verification ++++++++++++++++++++++++++++ -Starting with v1.3, pip provides SSL certificate verification over https, for the purpose -of providing secure, certified downloads from PyPI. +Starting with v1.3, pip provides SSL certificate verification over https, to +prevent man-in-the-middle attacks against PyPI downloads. .. _`Caching`: @@ -387,7 +387,7 @@ of providing secure, certified downloads from PyPI. Caching +++++++ -Starting with v6.0, pip provides an on by default cache which functions +Starting with v6.0, pip provides an on-by-default cache which functions similarly to that of a web browser. While the cache is on by default and is designed do the right thing by default you can disable the cache and always access PyPI by utilizing the ``--no-cache-dir`` option. @@ -443,19 +443,71 @@ When no wheels are found for an sdist, pip will attempt to build a wheel automatically and insert it into the wheel cache. -Hash Verification -+++++++++++++++++ +.. _`hash-checking mode`: -PyPI provides md5 hashes in the hash fragment of package download urls. - -pip supports checking this, as well as any of the -guaranteed hashlib algorithms (sha1, sha224, sha384, sha256, sha512, md5). - -The hash fragment is case sensitive (i.e. sha1 not SHA1). +Hash-Checking Mode +++++++++++++++++++ -This check is only intended to provide basic download corruption protection. -It is not intended to provide security against tampering. For that, -see :ref:`SSL Certificate Verification` +Since version 8.0, pip can check downloaded package archives against local +hashes to protect against remote tampering. To verify a package against one or +more hashes, add them to the end of the line:: + + FooProject == 1.2 --hash:sha256=2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 \ + --hash:sha256=486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7 + +(The ability to use multiple hashes is important when a package has both +binary and source distributions or when it offers binary distributions for a +variety of platforms.) + +The recommended hash algorithm at the moment is sha256, but stronger ones are +allowed, including all those supported by ``hashlib``. However, weak hashes +such as md5, sha1, and sha224 are excluded to avert false assurances of +security. + +Hash verification is an all-or-nothing proposition. Specifying a ``--hash`` +against any requirement not only checks that hash but also activates +*hash-checking mode*, which imposes several other security restrictions: + +* Hashes are required for all requirements. This is because a partially-hashed + requirements file is of little use and thus likely an error: a malicious + actor could slip bad code into the installation via one of the unhashed + requirements. Note that hashes embedded in URL-style requirements via the + ``#md5=...`` syntax suffice to satisfy this rule (regardless of hash + strength, for legacy reasons), though you use a stronger hash like sha256 + whenever possible. +* Hashes are required for all dependencies. An error is raised if there is a + dependency that is not spelled out and hashed in the requirements file. +* Requirements that take the form of project names (rather than URLs or local + filesystem paths) must be pinned to a specific version using ``==``. This + prevents a surprising hash mismatch upon the release of a new version + that matches the requirement specifier. +* ``--egg`` is disallowed, because it delegates installation of dependencies + to setuptools, giving up pip's ability to enforce any of the above. + +Hash-checking mode can be forced on with the ``--require-hashes`` command-line +option. This can be useful in deploy scripts, to ensure that the author of the +requirements file provided hashes. It is also a convenient way to bootstrap +your list of hashes, since it will show the hashes of the packages it +fetched. (It will fetch only a single archive for each package, so you may +still need to add additional hashes for alternatives: for instance if there is +both a binary and a source distribution available.) + +.. warning:: + Beware of the ``setup_requires`` keyword arg in :file:`setup.py`. The + (rare) packages that use it will cause those dependencies to be downloaded + by setuptools directly, skipping pip's hash-checking. If you need to use + such a package, see :ref:`Controlling + setup_requires`. + + +Hashes from PyPI +~~~~~~~~~~~~~~~~ + +PyPI provides an md5 hash in the fragment portion of each package download +URL. pip checks this as a protection against download corruption. However, +since the hash originates remotely, it is not a useful guard against tampering +and thus does not satisfy the ``--require-hashes`` demand that every package +have a local hash. .. _`editable-installs`: diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 5711bd0e1eb..495a4a3d408 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -23,6 +23,8 @@ Specifiers` For more information and examples, see the :ref:`pip install` reference. +.. _PyPI: http://pypi.python.org/pypi + .. _`Requirements Files`: @@ -71,7 +73,6 @@ In practice, there are 4 common uses of Requirements files: pkg2 pkg3>=1.0,<=2.0 - 3. Requirements files are used to force pip to install an alternate version of a sub-dependency. For example, suppose `ProjectA` in your requirements file requires `ProjectB`, but the latest version (v1.3) has a bug, you can force @@ -591,44 +592,80 @@ From within a real python, where ``SomePackage`` *is* installed globally, and is Ensuring Repeatability ********************** -Four things are required to fully guarantee a repeatable installation using requirements files. +pip can achieve various levels of repeatability: + +Pinned Version Numbers +---------------------- + +Pinning the versions of your dependencies in the requirements file +protects you from bugs or incompatibilities in newly released versions:: -1. The requirements file was generated by ``pip freeze`` or you're sure it only - contains requirements that specify a specific version. + SomePackage == 1.2.3 + DependencyOfSomePackage == 4.5.6 -2. The installation is performed using :ref:`--no-deps `. - This guarantees that only what is explicitly listed in the requirements file is - installed. +Using :ref:`pip freeze` to generate the requirements file will ensure that not +only the top-level dependencies are included but their sub-dependencies as +well, and so on. Perform the installation using :ref:`--no-deps +` for an extra dose of insurance against installing +anything not explicitly listed. -3. None of the packages to be installed utilize the setup_requires keyword. See - :ref:`Controlling setup_requires`. +This strategy is easy to implement and works across OSes and architectures. +However, it trusts PyPI, its CDN, and the network. It also relies on indices +and find-links locations not allowing packages to change without a version +increase. (PyPI does protect against this.) -4. The installation is performed against an index or find-links location that is - guaranteed to *not* allow archives to be changed and updated without a - version increase. While this is safe on PyPI, it may not be safe for other - indices. If you are working with an unsafe index, consider the `peep project - `_ which offers this feature on top of pip - using requirements file comments. +Hash-checking Mode +------------------ + +Beyond pinning version numbers, you can add hashes against which to verify +downloaded packages:: + FooProject == 1.2 --hash:sha256=2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 -.. _PyPI: http://pypi.python.org/pypi/ +This protects against compromises of PyPI, its CDN, the HTTPS certificate +chain, and the network between you and the packages. It also guards +against a package changing without a change in its version number, on +indexes that allow this. This approach is a good fit for automated +deployments to servers. +Hash-checking mode is a labor-saving alternative to running an internal index +server containing approved packages: it removes the need to upload packages, +maintain ACLs, and keep an audit trail (which a VCS give you for the +requirements file for free). It can also substitute for a vendor library, +providing easier upgrades and less VCS noise. It does not, of course, +provide the availability benefits of an internal index or a vendor library. + +For more, see :ref:`pip install\'s discussion of hash-checking mode `. .. _`Installation Bundle`: -Create an Installation Bundle with Compiled Dependencies -******************************************************** +Installation Bundles +-------------------- -You can create a simple bundle that contains all of the dependencies you wish -to install using:: +An installation bundle is a single file containing all of a project's +dependencies, already compiled if applicable. They are useful for +installing when the index server is unavailable and for avoiding +recompilation. Create a bundle like this:: $ tempdir=$(mktemp -d /tmp/wheelhouse-XXXXX) $ pip wheel -r requirements.txt --wheel-dir=$tempdir $ cwd=`pwd` $ (cd "$tempdir"; tar -cjvf "$cwd/bundled.tar.bz2" *) -Once you have a bundle, you can then install it using:: +Once you have a bundle, you can then install it like this:: $ tempdir=$(mktemp -d /tmp/wheelhouse-XXXXX) $ (cd $tempdir; tar -xvf /path/to/bundled.tar.bz2) $ pip install --force-reinstall --ignore-installed --upgrade --no-index --no-deps $tempdir/* + +Compiled packages are typically OS- and architecture-specific, so bundles +containing them are as well. Hash-checking mode can be used with +installation bundles to ensure that future bundles are built with +identical packages. + +.. warning:: + Finally, beware of the ``setup_requires`` keyword arg in :file:`setup.py`. + The (rare) packages that use it will cause those dependencies to be + downloaded by setuptools directly, skipping pip's protections. If you need + to use such a package, see :ref:`Controlling + setup_requires`. From bf0ff80d73a809a655ee9ae67d675644d0e2bfe6 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 7 Oct 2015 17:31:40 -0400 Subject: [PATCH 13/39] pep8 fixes --- tests/unit/test_req.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index d71256a5b4f..4a47c7666e0 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -224,14 +224,13 @@ def test_unhashed_deps_on_require_hashes(self, data): def test_hashed_deps_on_require_hashes(self, data): """Make sure hashed dependencies get installed when --require-hashes is on. - - (We actually just check that no error gets raised while preparing; - there is no reason to expect installation to then fail, as the code - paths are the same as ever.) - + + (We actually just check that no "not all dependencies are hashed!" + error gets raised while preparing; there is no reason to expect + installation to then fail, as the code paths are the same as ever.) + """ reqset = self.basic_reqset() - finder = PackageFinder([data.find_links], [], session=PipSession()) reqset.add_requirement(next(process_line( 'TopoRequires2==0.0.1 ' # requires TopoRequires '--hash=sha256:eaf9a01242c9f2f42cf2bd82a6a848cd' From c62cd71f0f6349689db5bd85b5fc3cc4b84d41c8 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 7 Oct 2015 17:33:57 -0400 Subject: [PATCH 14/39] Add --require-hashes option to pip download and pip wheel. Those commands already checked hashes, since they use RequirementSet, where the hash-checking is done. Reorder some options so pre, no-clean, and require-hashes are always in the same order. --- pip/cmdoptions.py | 11 +++++++++++ pip/commands/download.py | 4 +++- pip/commands/install.py | 10 +--------- pip/commands/wheel.py | 5 +++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 48e766411d5..556979d05e9 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -565,6 +565,17 @@ def _merge_hash(option, opt_str, value, parser): 'hash before installing. Example: --hash=sha256:abcdef...') +require_hashes = partial( + Option, + '--require-hashes', + dest='require_hashes', + action='store_true', + default=False, + help='Require a hash to check each requirement against, for ' + 'repeatable installs. Implied by the presence of a --hash ' + 'option on any package in a requirements file') + + ########## # groups # ########## diff --git a/pip/commands/download.py b/pip/commands/download.py index 0dcf1ea3ada..e1af9c3a52e 100644 --- a/pip/commands/download.py +++ b/pip/commands/download.py @@ -54,8 +54,9 @@ def __init__(self, *args, **kw): cmd_opts.add_option(cmdoptions.no_binary()) cmd_opts.add_option(cmdoptions.only_binary()) cmd_opts.add_option(cmdoptions.src()) - cmd_opts.add_option(cmdoptions.no_clean()) cmd_opts.add_option(cmdoptions.pre()) + cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) cmd_opts.add_option( '-d', '--dest', '--destination-dir', '--destination-directory', @@ -104,6 +105,7 @@ def run(self, options, args): ignore_dependencies=options.ignore_dependencies, session=session, isolated=options.isolated_mode, + require_hashes=options.require_hashes ) self.populate_requirement_set( requirement_set, diff --git a/pip/commands/install.py b/pip/commands/install.py index a4267922a32..faab4aad37a 100644 --- a/pip/commands/install.py +++ b/pip/commands/install.py @@ -156,16 +156,8 @@ def __init__(self, *args, **kw): cmd_opts.add_option(cmdoptions.no_binary()) cmd_opts.add_option(cmdoptions.only_binary()) cmd_opts.add_option(cmdoptions.pre()) - cmd_opts.add_option(cmdoptions.no_clean()) - - cmd_opts.add_option( - '--require-hashes', - dest='require_hashes', - action='store_true', - help='Perform a provably repeatable installation by requiring a ' - 'hash to check each package against. Implied by the presence ' - 'of a --hash option on any individual requirement') + cmd_opts.add_option(cmdoptions.require_hashes()) index_opts = cmdoptions.make_option_group( cmdoptions.index_group, diff --git a/pip/commands/wheel.py b/pip/commands/wheel.py index 0af377cc4ad..33dab46fe26 100644 --- a/pip/commands/wheel.py +++ b/pip/commands/wheel.py @@ -92,6 +92,7 @@ def __init__(self, *args, **kw): ) cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) index_opts = cmdoptions.make_option_group( cmdoptions.index_group, @@ -159,7 +160,6 @@ def run(self, options, args): options.build_dir = os.path.abspath(options.build_dir) with self._build_session(options) as session: - finder = self._build_package_finder(options, session) build_delete = (not (options.no_clean or options.build_dir)) wheel_cache = WheelCache(options.cache_dir, options.format_control) @@ -174,7 +174,8 @@ def run(self, options, args): isolated=options.isolated_mode, session=session, wheel_cache=wheel_cache, - wheel_download_dir=options.wheel_dir + wheel_download_dir=options.wheel_dir, + require_hashes=options.require_hashes ) self.populate_requirement_set( From 09008bf1902621a965aa65e2380e6ca5568dfa9b Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 7 Oct 2015 23:41:24 -0400 Subject: [PATCH 15/39] Add `pip hash` command. --- docs/reference/index.rst | 3 +-- docs/reference/pip_hash.rst | 42 ++++++++++++++++++++++++++++++ docs/reference/pip_install.rst | 28 +++++++++++++++----- docs/user_guide.rst | 11 ++++---- pip/commands/__init__.py | 3 +++ pip/commands/hash.py | 47 ++++++++++++++++++++++++++++++++++ tests/functional/test_hash.py | 8 ++++++ 7 files changed, 128 insertions(+), 14 deletions(-) create mode 100644 docs/reference/pip_hash.rst create mode 100644 pip/commands/hash.py create mode 100644 tests/functional/test_hash.py diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 8779fa8ac97..cb83554b30e 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -14,5 +14,4 @@ Reference Guide pip_show pip_search pip_wheel - - + pip_hash diff --git a/docs/reference/pip_hash.rst b/docs/reference/pip_hash.rst new file mode 100644 index 00000000000..64e4ec4b5b5 --- /dev/null +++ b/docs/reference/pip_hash.rst @@ -0,0 +1,42 @@ +.. _`pip hash`: + +pip hash +------------ + +.. contents:: + +Usage +***** + +.. pip-command-usage:: hash + + +Description +*********** + +.. pip-command-description:: hash + + +Overview +++++++++ +``pip hash`` is a convenient way to get a hash digest for use with +:ref:`hash-checking mode`, especially for packages with multiple archives. The +error message from ``pip install --require-hashes ...`` will give you one +hash, but, if there are multiple archives (like source and binary ones), you +will need to manually download and compute a hash for the other. Otherwise, a +spurious hash mismatch could occur when :ref:`pip install` is passed a different +set of options, like :ref:`--no-binary `. + + +Example +******** + +Compute the hash of a downloaded archive:: + + $ pip download SomePackage + Collecting SomePackage + Downloading SomePackage-2.2.tar.gz + Saved ./pip_downloads/SomePackage-2.2.tar.gz + Successfully downloaded SomePackage + $ pip hash ./pip_downloads/SomePackage-2.2.tar.gz + --hash=sha256:93e62e05c7ad3da1a233def6731e8285156701e3419a5fe279017c429ec67ce0 diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index 12626ea273d..61eda8f6529 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -460,7 +460,7 @@ binary and source distributions or when it offers binary distributions for a variety of platforms.) The recommended hash algorithm at the moment is sha256, but stronger ones are -allowed, including all those supported by ``hashlib``. However, weak hashes +allowed, including all those supported by ``hashlib``. However, weaker ones such as md5, sha1, and sha224 are excluded to avert false assurances of security. @@ -485,12 +485,28 @@ against any requirement not only checks that hash but also activates to setuptools, giving up pip's ability to enforce any of the above. Hash-checking mode can be forced on with the ``--require-hashes`` command-line -option. This can be useful in deploy scripts, to ensure that the author of the +option:: + + $ pip install --require-hashes -r requirements.txt + ... + Hashes are required in --require-hashes mode (implicitly on when a hash is + specified for any package). These requirements were missing hashes, + leaving them open to tampering. These are the hashes the downloaded + archives actually had. You can add lines like these to your requirements + files to prevent tampering. + pyelasticsearch==1.0 --hash=sha256:44ddfb1225054d7d6b1d02e9338e7d4809be94edbe9929a2ec0807d38df993fa + more-itertools==2.2 --hash=sha256:93e62e05c7ad3da1a233def6731e8285156701e3419a5fe279017c429ec67ce0 + +This can be useful in deploy scripts, to ensure that the author of the requirements file provided hashes. It is also a convenient way to bootstrap -your list of hashes, since it will show the hashes of the packages it -fetched. (It will fetch only a single archive for each package, so you may -still need to add additional hashes for alternatives: for instance if there is -both a binary and a source distribution available.) +your list of hashes, since it shows the hashes of the packages it fetched. It +fetches only the preferred archive for each package, so you may still need to +add hashes for alternatives archives using :ref:`pip hash`: for instance if +there is both a binary and a source distribution. + +Hash-checking mode also functions with :ref:`pip download` and :ref:`pip +wheel`. A :ref:`comparison of hash-checking mode with other repeatability +strategies ` is available in the User Guide. .. warning:: Beware of the ``setup_requires`` keyword arg in :file:`setup.py`. The diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 495a4a3d408..63c39b84a1c 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -624,16 +624,15 @@ downloaded packages:: This protects against compromises of PyPI, its CDN, the HTTPS certificate chain, and the network between you and the packages. It also guards -against a package changing without a change in its version number, on -indexes that allow this. This approach is a good fit for automated -deployments to servers. +against a package changing without its version number changing, on indexes +that allow this. This approach is a good fit for automated server deployments. -Hash-checking mode is a labor-saving alternative to running an internal index +Hash-checking mode is a labor-saving alternative to running a private index server containing approved packages: it removes the need to upload packages, -maintain ACLs, and keep an audit trail (which a VCS give you for the +maintain ACLs, and keep an audit trail (which a VCS gives you on the requirements file for free). It can also substitute for a vendor library, providing easier upgrades and less VCS noise. It does not, of course, -provide the availability benefits of an internal index or a vendor library. +provide the availability benefits of a private index or a vendor library. For more, see :ref:`pip install\'s discussion of hash-checking mode `. diff --git a/pip/commands/__init__.py b/pip/commands/__init__.py index dc3418f1038..6910f517be5 100644 --- a/pip/commands/__init__.py +++ b/pip/commands/__init__.py @@ -6,6 +6,7 @@ from pip.commands.completion import CompletionCommand from pip.commands.download import DownloadCommand from pip.commands.freeze import FreezeCommand +from pip.commands.hash import HashCommand from pip.commands.help import HelpCommand from pip.commands.list import ListCommand from pip.commands.search import SearchCommand @@ -18,6 +19,7 @@ commands_dict = { CompletionCommand.name: CompletionCommand, FreezeCommand.name: FreezeCommand, + HashCommand.name: HashCommand, HelpCommand.name: HelpCommand, SearchCommand.name: SearchCommand, ShowCommand.name: ShowCommand, @@ -38,6 +40,7 @@ ShowCommand, SearchCommand, WheelCommand, + HashCommand, HelpCommand, ] diff --git a/pip/commands/hash.py b/pip/commands/hash.py new file mode 100644 index 00000000000..54ddaed603c --- /dev/null +++ b/pip/commands/hash.py @@ -0,0 +1,47 @@ +from __future__ import absolute_import + +import hashlib +import logging +import sys + +from pip.basecommand import Command +from pip.exceptions import FAVORITE_HASH +from pip.status_codes import ERROR + + +logger = logging.getLogger(__name__) + + +class HashCommand(Command): + """ + Compute a hash of a local package archive. + + These can be used with --hash in a requirements file to do repeatable + installs. + + """ + name = 'hash' + usage = """%prog [options] ...""" + summary = 'Compute hashes of package archives.' + + def run(self, options, args): + if not args: + self.parser.print_usage(sys.stderr) + return ERROR + + for path in args: + logger.info('%s:\n--hash=%s:%s' % (path, + FAVORITE_HASH, + _hash_of_file(path))) + + +def _hash_of_file(path): + """Return the hash digest of a file.""" + with open(path, 'rb') as archive: + hash = hashlib.new(FAVORITE_HASH) + while True: + data = archive.read(2 ** 20) + if not data: + break + hash.update(data) + return hash.hexdigest() diff --git a/tests/functional/test_hash.py b/tests/functional/test_hash.py new file mode 100644 index 00000000000..83cb763c147 --- /dev/null +++ b/tests/functional/test_hash.py @@ -0,0 +1,8 @@ +def test_basic(script, tmpdir): + """Run 'pip hash' through its paces.""" + archive = tmpdir / 'hashable' + archive.write('hello') + result = script.pip('hash', archive) + expected = ('--hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425' + 'e73043362938b9824') + assert expected in str(result) From d477ae6c5cfffc44c56717767fb71d48bc8c5746 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 8 Oct 2015 00:36:14 -0400 Subject: [PATCH 16/39] Add warning about `python setup.py install`. --- docs/reference/pip_install.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index 61eda8f6529..99f199f69c5 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -515,6 +515,17 @@ strategies ` is available in the User Guide. such a package, see :ref:`Controlling setup_requires`. +.. warning:: + Be careful not to nullify all your security work when you install your + actual project. If you call ``python setup.py install`` after installing + your requirements, setuptools will happily go out and download, unchecked, + anything you missed in your requirements file—and it’s easy to miss things + as your project evolves. One way to be safe is to pack up your project and + then install that using pip and :ref:`--no-deps `:: + + python setup.py sdist + pip install --no-deps dist/yourproject-1.0.tar.gz + Hashes from PyPI ~~~~~~~~~~~~~~~~ From 0e6058bc634fc35545a3019119ccf2dc5915d57c Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 8 Oct 2015 17:57:21 -0400 Subject: [PATCH 17/39] Change head() method to an attr in hashing exceptions. Tweak English. Standardize on present tense, improve flow, and clarify. --- pip/exceptions.py | 63 ++++++++++++++++-------------------------- tests/unit/test_req.py | 10 ++++--- 2 files changed, 30 insertions(+), 43 deletions(-) diff --git a/pip/exceptions.py b/pip/exceptions.py index 3c4c6f590dd..a18867fcfb6 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -51,7 +51,8 @@ class UnsupportedWheel(InstallationError): """Unsupported wheel.""" -# The recommended hash algo of the moment. Feel free to change this any time. +# The recommended hash algo of the moment. Change this whenever the state of +# the art changes; it won't hurt backward compatibility. FAVORITE_HASH = 'sha256' @@ -68,7 +69,7 @@ def __str__(self): lines = [] self.errors.sort(key=lambda e: e.order) for cls, errors_of_cls in groupby(self.errors, lambda e: e.__class__): - lines.append(cls.head()) + lines.append(cls.head) lines.extend(e.body() for e in errors_of_cls) if lines: return '\n'.join(lines) @@ -88,17 +89,15 @@ class HashError(InstallationError): about unpinned packages when he has deeper issues, like VCS dependencies, to deal with. Also keeps error reports in a deterministic order. + :cvar head: A section heading for display above potentially many + exceptions of this kind :ivar req: The InstallRequirement that triggered this error. This is pasted on after the exception is instantiated, because it's not typically available earlier. """ req = None - - @classmethod - def head(cls): - """Return a section heading for display above potentially many - exceptions of this kind.""" + head = '' def body(self): """Return a summary of me for display under the heading. @@ -113,7 +112,7 @@ def body(self): return ' %s' % self._requirement_name() def __str__(self): - return '%s\n%s' % (self.head(), self.body()) + return '%s\n%s' % (self.head, self.body()) def _requirement_name(self): """Return a description of the requirement that triggered me. @@ -130,11 +129,8 @@ class VcsHashUnsupported(HashError): we don't have a method for hashing those.""" order = 0 - - @classmethod - def head(cls): - return ("Can't verify hashes for these requirements because we don't " - "have a way to hash version control repositories:") + head = ("Can't verify hashes for these requirements because we don't " + "have a way to hash version control repositories:") class DirectoryUrlHashUnsupported(HashError): @@ -142,17 +138,21 @@ class DirectoryUrlHashUnsupported(HashError): we don't have a method for hashing those.""" order = 1 - - @classmethod - def head(cls): - return ("Can't verify hashes for these file:// requirements because " - "they point to directories:") + head = ("Can't verify hashes for these file:// requirements because they " + "point to directories:") class HashMissing(HashError): """A hash was needed for a requirement but is absent.""" order = 2 + head = ('Hashes are required in --require-hashes mode, but they are ' + 'missing from some requirements. Here is a list of those ' + 'requirements along with the hashes their downloaded archives ' + 'actually had. Add lines like these to your requirements files to ' + 'prevent tampering. (If you did not enable --require-hashes ' + 'manually, note that it turns on automatically when any package ' + 'has a hash.)') def __init__(self, gotten_hash): """ @@ -161,15 +161,6 @@ def __init__(self, gotten_hash): """ self.gotten_hash = gotten_hash - @classmethod - def head(cls): - return ('Hashes are required in --require-hashes mode (implicitly on ' - 'when a hash is specified for any package). These ' - 'requirements were missing hashes, leaving them open to ' - 'tampering. These are the hashes the downloaded archives ' - 'actually had. You can add lines like these to your ' - 'requirements files to prevent tampering.') - def body(self): package_name = (self.req.req if self.req and # In case someone feeds something @@ -187,11 +178,8 @@ class HashUnpinned(HashError): version.""" order = 3 - - @classmethod - def head(cls): - return ('In --require-hashes mode, all requirements must have their ' - 'versions pinned with ==. These do not:') + head = ('In --require-hashes mode, all requirements must have their ' + 'versions pinned with ==. These do not:') class HashMismatch(HashError): @@ -203,6 +191,10 @@ class HashMismatch(HashError): """ order = 4 + head = ('THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS ' + 'FILE. If you have updated the package versions, please update ' + 'the hashes. Otherwise, examine the package contents carefully; ' + 'someone may have tampered with them.') def __init__(self, goods, gots): """ @@ -214,13 +206,6 @@ def __init__(self, goods, gots): self.goods = goods self.gots = gots - @classmethod - def head(cls): - return ('THESE PACKAGES DID NOT MATCH THE HASHES FROM THE ' - 'REQUIREMENTS FILE. If you have updated the package versions, ' - 'update the hashes. Otherwise, examine the package contents ' - 'carefully; someone may have tampered with them.') - def body(self): return ' %s:\n%s' % (self._requirement_name(), self._hash_comparison()) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 4a47c7666e0..a0488e0966d 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -98,9 +98,10 @@ def test_missing_hash_checking(self, data): session=PipSession()) assert_raises_regexp( HashErrors, - r'These requirements were missing hashes.*\n' + r'Hashes are required in --require-hashes mode, but they are ' + r'missing .*\n' r' blessings==1.0 --hash=sha256:[0-9a-f]+\n' - r'THESE PACKAGES DID NOT MATCH THE HASHES.*\n' + r'THESE PACKAGES DO NOT MATCH THE HASHES.*\n' r' tracefront==0.1 .*:\n' r' Expected sha256 somehash\n' r' Got [0-9a-f]+$', @@ -117,7 +118,8 @@ def test_missing_hash_with_require_hashes(self, data): finder = PackageFinder([data.find_links], [], session=PipSession()) assert_raises_regexp( HashErrors, - r'These requirements were missing hashes.*\n' + r'Hashes are required in --require-hashes mode, but they are ' + r'missing .*\n' r' simple==1.0 --hash=sha256:393043e672415891885c9a2a0929b1af95' r'fb866d6ca016b42d2e6ce53619b653$', reqset.prepare_files, @@ -195,7 +197,7 @@ def test_hash_mismatch(self, data): finder = PackageFinder([data.find_links], [], session=PipSession()) assert_raises_regexp( HashErrors, - r'THESE PACKAGES DID NOT MATCH THE HASHES.*\n' + r'THESE PACKAGES DO NOT MATCH THE HASHES.*\n' r' file:///.*/data/packages/simple-1\.0\.tar\.gz .*:\n' r' Expected sha256 badbad\n' r' Got 393043e672415891885c9a2a0929b1af95fb866d' From 6f828c351f726a82450e861f33a4a065a9f4093d Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 9 Oct 2015 12:27:10 -0400 Subject: [PATCH 18/39] Correct and clarify docs and comments. --- docs/reference/pip_install.rst | 4 ++-- pip/download.py | 11 ++++++----- pip/req/req_install.py | 11 ++++++----- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index 8b0e4ff2f20..e7158528ac6 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -475,8 +475,8 @@ against any requirement not only checks that hash but also activates actor could slip bad code into the installation via one of the unhashed requirements. Note that hashes embedded in URL-style requirements via the ``#md5=...`` syntax suffice to satisfy this rule (regardless of hash - strength, for legacy reasons), though you use a stronger hash like sha256 - whenever possible. + strength, for legacy reasons), though you should use a stronger + hash like sha256 whenever possible. * Hashes are required for all dependencies. An error is raised if there is a dependency that is not spelled out and hashed in the requirements file. * Requirements that take the form of project names (rather than URLs or local diff --git a/pip/download.py b/pip/download.py index 05bfffd3977..9573a33b0b6 100644 --- a/pip/download.py +++ b/pip/download.py @@ -674,10 +674,11 @@ def unpack_file_url(link, location, download_dir=None, hashes=None): logger.info('Link is a directory, ignoring download_dir') return - # If --require-hashes is off, `hashes` is either empty, the link hash, or - # MissingHashes, and it's required to match. If --require-hashes is on, we - # are satisfied by any hash in `hashes` matching: a URL-based or an - # option-based one; no internet-sourced hash will be in `hashes`. + # If --require-hashes is off, `hashes` is either empty, the + # link's embeddded hash, or MissingHashes; it is required to + # match. If --require-hashes is on, we are satisfied by any + # hash in `hashes` matching: a URL-based or an option-based + # one; no internet-sourced hash will be in `hashes`. if hashes: hashes.check_against_path(link_path) @@ -744,7 +745,7 @@ def unpack_url(link, location, download_dir=None, - if only_download, mark location for deletion :param hashes: A Hashes object, one of whose embedded hashes must match, - or I'll raise HashMismatch. If the Hashes is empty, no matches are + or HashMismatch will be raised. If the Hashes is empty, no matches are required, and unhashable types of requirements (like VCS ones, which would ordinarily raise HashUnsupported) are allowed. """ diff --git a/pip/req/req_install.py b/pip/req/req_install.py index 02e0ab9e35d..f43ddc643b9 100644 --- a/pip/req/req_install.py +++ b/pip/req/req_install.py @@ -1038,11 +1038,12 @@ def hashes(self, trust_internet=True): """Return a hash-comparer that considers my option- and URL-based hashes to be known-good. - Hashes in URLs are almost peers with ones from flags. They satisfy - --require-hashes (whether it was implicitly or explicitly activated) - but do not activate it. md5 and sha224 are not allowed in flags, which - should nudge people toward good algos. We always OR all hashes - together, even ones from URLs. + Hashes in URLs--ones embedded in the requirements file, not ones + downloaded from an index server--are almost peers with ones from + flags. They satisfy --require-hashes (whether it was implicitly or + explicitly activated) but do not activate it. md5 and sha224 are not + allowed in flags, which should nudge people toward good algos. We + always OR all hashes together, even ones from URLs. :param trust_internet: Whether to trust URL-based (#md5=...) hashes downloaded from the internet, as by populate_link() From 52111c139738d70fc081384589fba28c54fddd46 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 9 Oct 2015 12:29:46 -0400 Subject: [PATCH 19/39] Demote package-is-already-installed log message to debug-level. An info-level message for each package might be too intense. And it might give a false sense of security as well: it doesn't confirm that the virtualenv is non-empty; it merely notices when a package we're installing is already there. --- pip/req/req_set.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pip/req/req_set.py b/pip/req/req_set.py index 1bb1684b675..ab260e45546 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -475,7 +475,7 @@ def _prepare_file(self, req_to_install.archive(self.download_dir) elif req_to_install.satisfied_by: if require_hashes: - logger.info( + logger.debug( 'Since it is already installed, we are trusting this ' 'package without checking its hash. To ensure a ' 'completely repeatable environment, install into an ' From b95599a944cb748d51603842f9103c7cceaa17e2 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 9 Oct 2015 12:37:09 -0400 Subject: [PATCH 20/39] Change _good_hashes() to a whitelist. This guards against the possibility of a weaker hash being added to hashlib in the future. Also give _good_hashes() a more descriptive name, and describe what we mean by "strong". We can get away with returning a static list because those algorithms are guaranteed present in hashlib. --- pip/cmdoptions.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 556979d05e9..180f8c2868f 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -10,7 +10,6 @@ from __future__ import absolute_import from functools import partial -import hashlib from optparse import OptionGroup, SUPPRESS_HELP, Option import warnings @@ -524,14 +523,10 @@ def only_binary(): ) -def _good_hashes(): - """Return names of hashlib algorithms at least as strong as sha256.""" - # Remove getattr when 2.6 dies. - algos = set( - getattr(hashlib, - 'algorithms', - ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'))) - return algos - set(['md5', 'sha1', 'sha224']) +def _strong_hashes(): + """Return names of hashlib algorithms at least as + collision-resistant as sha256.""" + return ['sha256', 'sha384', 'sha512'] def _merge_hash(option, opt_str, value, parser): @@ -545,10 +540,10 @@ def _merge_hash(option, opt_str, value, parser): parser.error('Arguments to %s must be a hash name ' 'followed by a value, like --hash=sha256:abcde...' % opt_str) - goods = _good_hashes() - if algo not in goods: + strongs = _strong_hashes() + if algo not in strongs: parser.error('Allowed hash algorithms for %s are %s.' % - (opt_str, ', '.join(sorted(goods)))) + (opt_str, ', '.join(strongs))) parser.values.hashes.setdefault(algo, []).append(digest) From 3824d735a0303f0f91b1bb662c92e584925866f6 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 9 Oct 2015 14:46:56 -0400 Subject: [PATCH 21/39] Revise what hashes protect you against. We don't need to talk about the network, since HTTPS should ensure transmission integrity. We do need to watch out for the CA chain. Stop mentioning the CDN because it's a deep hole: we might as well mention Rackspace and Amazon and who knows who else. --- docs/user_guide.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 52a297adaa5..55960fda992 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -610,9 +610,10 @@ well, and so on. Perform the installation using :ref:`--no-deps anything not explicitly listed. This strategy is easy to implement and works across OSes and architectures. -However, it trusts PyPI, its CDN, and the network. It also relies on indices -and find-links locations not allowing packages to change without a version -increase. (PyPI does protect against this.) +However, it trusts PyPI and the certificate authority chain. It +also relies on indices and find-links locations not allowing +packages to change without a version increase. (PyPI does protect +against this.) Hash-checking Mode ------------------ @@ -622,10 +623,10 @@ downloaded packages:: FooProject == 1.2 --hash:sha256=2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 -This protects against compromises of PyPI, its CDN, the HTTPS certificate -chain, and the network between you and the packages. It also guards -against a package changing without its version number changing, on indexes -that allow this. This approach is a good fit for automated server deployments. +This protects against a compromise of PyPI or the HTTPS +certificate chain. It also guards against a package changing +without its version number changing (on indexes that allow this). +This approach is a good fit for automated server deployments. Hash-checking mode is a labor-saving alternative to running a private index server containing approved packages: it removes the need to upload packages, From be4e315c59d774026ce9937b94fc49bc91511bef Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 9 Oct 2015 14:51:54 -0400 Subject: [PATCH 22/39] Rewrap args of unpack_http_url() to match the style in send(), above. --- pip/download.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pip/download.py b/pip/download.py index 9573a33b0b6..9fc7d02fdc6 100644 --- a/pip/download.py +++ b/pip/download.py @@ -615,11 +615,8 @@ def _copy_file(filename, location, content_type, link): logger.info('Saved %s', display_path(download_location)) -def unpack_http_url(link, - location, - download_dir=None, - session=None, - hashes=None): +def unpack_http_url(link, location, download_dir=None, + session=None, hashes=None): if session is None: raise TypeError( "unpack_http_url() missing 1 required keyword argument: 'session'" From 304c90aa46c099a6daa279e1bdf2c43f50a6a78c Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 9 Oct 2015 15:00:50 -0400 Subject: [PATCH 23/39] Break after initial """ in multi-paragraph docstrings in exceptions module. --- pip/exceptions.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pip/exceptions.py b/pip/exceptions.py index a18867fcfb6..ffaf3dfef61 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -82,7 +82,8 @@ def __bool__(self): class HashError(InstallationError): - """A failure to verify a package against known-good hashes + """ + A failure to verify a package against known-good hashes :cvar order: An int sorting hash exception classes by difficulty of recovery (lower being harder), so the user doesn't bother fretting @@ -183,7 +184,8 @@ class HashUnpinned(HashError): class HashMismatch(HashError): - """Distribution file hash values don't match. + """ + Distribution file hash values don't match. :ivar package_name: The name of the package that triggered the hash mismatch. Feel free to write to this after the exception is raise to @@ -211,7 +213,8 @@ def body(self): self._hash_comparison()) def _hash_comparison(self): - """Return a comparison of actual and expected hash values. + """ + Return a comparison of actual and expected hash values. Example:: From 05b7ef9467594d8ed6e31b9534ddecef9d2db57f Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Sun, 11 Oct 2015 09:00:01 -0400 Subject: [PATCH 24/39] Rename "goods" to "allowed" for clarity. Renaming "gots" didn't go well. I think the current naming is the most concise way to put it. If we rename it to "got", then the loop iterator can't be called "got", and the simple relationship between the iterator and collection names is lost. "Actual" and "actuals" are the other names that occurred to me, but they look so much like "allowed" that the code becomes harder to read. --- pip/exceptions.py | 8 ++++---- pip/utils/hashes.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pip/exceptions.py b/pip/exceptions.py index ffaf3dfef61..7cdfcf7af60 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -198,14 +198,14 @@ class HashMismatch(HashError): 'the hashes. Otherwise, examine the package contents carefully; ' 'someone may have tampered with them.') - def __init__(self, goods, gots): + def __init__(self, allowed, gots): """ - :param goods: A dict of algorithm names pointing to lists of allowed + :param allowed: A dict of algorithm names pointing to lists of allowed hex digests :param gots: A dict of algorithm names pointing to hashes we actually got from the files under suspicion """ - self.goods = goods + self.allowed = allowed self.gots = gots def body(self): @@ -229,7 +229,7 @@ def hash_then_or(hash_name): return chain([hash_name], repeat(' or')) lines = [] - for hash_name, expecteds in iteritems(self.goods): + for hash_name, expecteds in iteritems(self.allowed): prefix = hash_then_or(hash_name) lines.extend((' Expected %s %s' % (next(prefix), e)) for e in expecteds) diff --git a/pip/utils/hashes.py b/pip/utils/hashes.py index ab06c097a54..84fbc4f278d 100644 --- a/pip/utils/hashes.py +++ b/pip/utils/hashes.py @@ -17,7 +17,7 @@ def __init__(self, hashes=None): :param hashes: A dict of algorithm names pointing to lists of allowed hex digests """ - self._goods = {} if hashes is None else hashes + self._allowed = {} if hashes is None else hashes def check_against_chunks(self, chunks): """Check good hashes against ones built from iterable of chunks of @@ -27,7 +27,7 @@ def check_against_chunks(self, chunks): """ gots = {} - for hash_name in iterkeys(self._goods): + for hash_name in iterkeys(self._allowed): try: gots[hash_name] = hashlib.new(hash_name) except (ValueError, TypeError): @@ -38,12 +38,12 @@ def check_against_chunks(self, chunks): hash.update(chunk) for hash_name, got in iteritems(gots): - if got.hexdigest() in self._goods[hash_name]: + if got.hexdigest() in self._allowed[hash_name]: return self._raise(gots) def _raise(self, gots): - raise HashMismatch(self._goods, gots) + raise HashMismatch(self._allowed, gots) def check_against_file(self, file): """Check good hashes against a file-like object @@ -65,7 +65,7 @@ def check_against_path(self, path): def __nonzero__(self): """Return whether I know any known-good hashes.""" - return bool(self._goods) + return bool(self._allowed) def __bool__(self): return self.__nonzero__() @@ -74,7 +74,7 @@ def __bool__(self): class MissingHashes(Hashes): """A workalike for Hashes used when we're missing a hash for a requirement - It computes the "gotten" hash of the requirement and raises a HashMissing + It computes the actual hash of the requirement and raises a HashMissing exception showing it to the user. """ From f35ce75025cdc33fe207e2c0b07f50e3b4a7e767 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Sun, 11 Oct 2015 09:15:06 -0400 Subject: [PATCH 25/39] Make "installation bundles" less of an official term. --- docs/user_guide.rst | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 55960fda992..90f941cc20d 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -642,26 +642,27 @@ For more, see :ref:`pip install\'s discussion of hash-checking mode Date: Sun, 11 Oct 2015 10:13:13 -0400 Subject: [PATCH 26/39] Allow === as a pinning operator. https://www.python.org/dev/peps/pep-0440/#arbitrary-equality --- pip/req/req_install.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pip/req/req_install.py b/pip/req/req_install.py index f43ddc643b9..62bc6a18018 100644 --- a/pip/req/req_install.py +++ b/pip/req/req_install.py @@ -273,7 +273,8 @@ def is_pinned(self): For example, some-package==1.2 is pinned; some-package>1.2 is not. """ specifiers = self.specifier - return len(specifiers) == 1 and next(iter(specifiers)).operator == '==' + return (len(specifiers) == 1 and + next(iter(specifiers)).operator in ('==', '===')) def from_path(self): if self.req is None: From 76983f363acd3cff673b64cbcdd2eb1340a7a710 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 12 Oct 2015 11:51:27 -0400 Subject: [PATCH 27/39] Restore documentation about alternate hash algorithms in URLs. --- docs/reference/pip_install.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index e7158528ac6..f6d72f61921 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -532,11 +532,13 @@ strategies ` is available in the User Guide. Hashes from PyPI ~~~~~~~~~~~~~~~~ -PyPI provides an md5 hash in the fragment portion of each package download -URL. pip checks this as a protection against download corruption. However, -since the hash originates remotely, it is not a useful guard against tampering -and thus does not satisfy the ``--require-hashes`` demand that every package -have a local hash. +PyPI provides an MD5 hash in the fragment portion of each package download URL, +like ``#md5=123...``, which pip checks as a protection against download +corruption. Other hash algorithms that have guaranteed support from ``hashlib`` +are also supported here: sha1, sha224, sha384, sha256, and sha512. Since this +hash originates remotely, it is not a useful guard against tampering and thus +does not satisfy the ``--require-hashes`` demand that every package have a +local hash. .. _`editable-installs`: From be6dccb0345be31886f6647251c7c188fc7e1560 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 12 Oct 2015 12:54:29 -0400 Subject: [PATCH 28/39] Factor up the idiom of reading chunks from a file until EOF. --- pip/commands/hash.py | 8 +++----- pip/utils/__init__.py | 9 +++++++++ pip/utils/hashes.py | 9 ++------- pip/wheel.py | 7 +++---- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/pip/commands/hash.py b/pip/commands/hash.py index 54ddaed603c..b030aa244dd 100644 --- a/pip/commands/hash.py +++ b/pip/commands/hash.py @@ -7,6 +7,7 @@ from pip.basecommand import Command from pip.exceptions import FAVORITE_HASH from pip.status_codes import ERROR +from pip.utils import read_chunks logger = logging.getLogger(__name__) @@ -39,9 +40,6 @@ def _hash_of_file(path): """Return the hash digest of a file.""" with open(path, 'rb') as archive: hash = hashlib.new(FAVORITE_HASH) - while True: - data = archive.read(2 ** 20) - if not data: - break - hash.update(data) + for chunk in read_chunks(archive): + hash.update(chunk) return hash.hexdigest() diff --git a/pip/utils/__init__.py b/pip/utils/__init__.py index 1d907217331..d1ffda4f9ac 100644 --- a/pip/utils/__init__.py +++ b/pip/utils/__init__.py @@ -221,6 +221,15 @@ def file_contents(filename): return fp.read().decode('utf-8') +def read_chunks(file, size=4096): + """Yield pieces of data from a file-like object until EOF.""" + while True: + chunk = file.read(size) + if not chunk: + break + yield chunk + + def split_leading_dir(path): path = path.lstrip('/').lstrip('\\') if '/' in path and (('\\' in path and path.find('/') < path.find('\\')) or diff --git a/pip/utils/hashes.py b/pip/utils/hashes.py index 84fbc4f278d..2e58bca26ca 100644 --- a/pip/utils/hashes.py +++ b/pip/utils/hashes.py @@ -4,6 +4,7 @@ from pip.exceptions import (HashMismatch, HashMissing, InstallationError, FAVORITE_HASH) +from pip.utils import read_chunks from pip._vendor.six import iteritems, iterkeys, itervalues @@ -51,13 +52,7 @@ def check_against_file(self, file): Raise HashMismatch if none match. """ - def chunks(): - while True: - chunk = file.read(4096) - if not chunk: - break - yield chunk - return self.check_against_chunks(chunks()) + return self.check_against_chunks(read_chunks(file)) def check_against_path(self, path): with open(path, 'rb') as file: diff --git a/pip/wheel.py b/pip/wheel.py index 5ccebf042e7..89ca626108d 100644 --- a/pip/wheel.py +++ b/pip/wheel.py @@ -31,7 +31,8 @@ from pip.locations import distutils_scheme, PIP_DELETE_MARKER_FILENAME from pip import pep425tags from pip.utils import ( - call_subprocess, ensure_dir, captured_stdout, rmtree, canonicalize_name) + call_subprocess, ensure_dir, captured_stdout, rmtree, canonicalize_name, + read_chunks) from pip.utils.logging import indent_log from pip._vendor.distlib.scripts import ScriptMaker from pip._vendor import pkg_resources @@ -149,11 +150,9 @@ def rehash(path, algo='sha256', blocksize=1 << 20): h = hashlib.new(algo) length = 0 with open(path, 'rb') as f: - block = f.read(blocksize) - while block: + for block in read_chunks(f, size=blocksize): length += len(block) h.update(block) - block = f.read(blocksize) digest = 'sha256=' + urlsafe_b64encode( h.digest() ).decode('latin1').rstrip('=') From 9e5e34e9f76f0855ed452d9b418abc5880eaa564 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 12 Oct 2015 14:01:32 -0400 Subject: [PATCH 29/39] Add --algorithm flag to `pip hash`. --- docs/reference/pip_hash.rst | 13 ++++++++++--- pip/cmdoptions.py | 10 ++++++---- pip/commands/hash.py | 22 ++++++++++++++++++---- tests/functional/test_hash.py | 29 +++++++++++++++++++++++++---- 4 files changed, 59 insertions(+), 15 deletions(-) diff --git a/docs/reference/pip_hash.rst b/docs/reference/pip_hash.rst index 64e4ec4b5b5..72052bc22dc 100644 --- a/docs/reference/pip_hash.rst +++ b/docs/reference/pip_hash.rst @@ -23,9 +23,15 @@ Overview :ref:`hash-checking mode`, especially for packages with multiple archives. The error message from ``pip install --require-hashes ...`` will give you one hash, but, if there are multiple archives (like source and binary ones), you -will need to manually download and compute a hash for the other. Otherwise, a -spurious hash mismatch could occur when :ref:`pip install` is passed a different -set of options, like :ref:`--no-binary `. +will need to manually download and compute a hash for the others. Otherwise, a +spurious hash mismatch could occur when :ref:`pip install` is passed a +different set of options, like :ref:`--no-binary `. + + +Options +******* + +.. pip-command-options:: hash Example @@ -39,4 +45,5 @@ Compute the hash of a downloaded archive:: Saved ./pip_downloads/SomePackage-2.2.tar.gz Successfully downloaded SomePackage $ pip hash ./pip_downloads/SomePackage-2.2.tar.gz + ./pip_downloads/SomePackage-2.2.tar.gz: --hash=sha256:93e62e05c7ad3da1a233def6731e8285156701e3419a5fe279017c429ec67ce0 diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 180f8c2868f..0d986427bf3 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -523,9 +523,11 @@ def only_binary(): ) -def _strong_hashes(): - """Return names of hashlib algorithms at least as - collision-resistant as sha256.""" +def strong_hashes(): + """Return names of hashlib algorithms allowed by the --hash option. + + Currently, those are the ones at least as collision-resistant as sha256. + """ return ['sha256', 'sha384', 'sha512'] @@ -540,7 +542,7 @@ def _merge_hash(option, opt_str, value, parser): parser.error('Arguments to %s must be a hash name ' 'followed by a value, like --hash=sha256:abcde...' % opt_str) - strongs = _strong_hashes() + strongs = strong_hashes() if algo not in strongs: parser.error('Allowed hash algorithms for %s are %s.' % (opt_str, ', '.join(strongs))) diff --git a/pip/commands/hash.py b/pip/commands/hash.py index b030aa244dd..0d138ad4efc 100644 --- a/pip/commands/hash.py +++ b/pip/commands/hash.py @@ -5,6 +5,7 @@ import sys from pip.basecommand import Command +from pip.cmdoptions import strong_hashes from pip.exceptions import FAVORITE_HASH from pip.status_codes import ERROR from pip.utils import read_chunks @@ -25,21 +26,34 @@ class HashCommand(Command): usage = """%prog [options] ...""" summary = 'Compute hashes of package archives.' + def __init__(self, *args, **kw): + super(HashCommand, self).__init__(*args, **kw) + self.cmd_opts.add_option( + '-a', '--algorithm', + dest='algorithm', + choices=strong_hashes(), + action='store', + default=FAVORITE_HASH, + help='The hash algorithm to use: one of %s' % + ', '.join(strong_hashes())) + self.parser.insert_option_group(0, self.cmd_opts) + def run(self, options, args): if not args: self.parser.print_usage(sys.stderr) return ERROR + algorithm = options.algorithm for path in args: logger.info('%s:\n--hash=%s:%s' % (path, - FAVORITE_HASH, - _hash_of_file(path))) + algorithm, + _hash_of_file(path, algorithm))) -def _hash_of_file(path): +def _hash_of_file(path, algorithm): """Return the hash digest of a file.""" with open(path, 'rb') as archive: - hash = hashlib.new(FAVORITE_HASH) + hash = hashlib.new(algorithm) for chunk in read_chunks(archive): hash.update(chunk) return hash.hexdigest() diff --git a/tests/functional/test_hash.py b/tests/functional/test_hash.py index 83cb763c147..dc19b2c7168 100644 --- a/tests/functional/test_hash.py +++ b/tests/functional/test_hash.py @@ -1,8 +1,29 @@ def test_basic(script, tmpdir): - """Run 'pip hash' through its paces.""" - archive = tmpdir / 'hashable' - archive.write('hello') - result = script.pip('hash', archive) + """Run 'pip hash' through its default behavior.""" expected = ('--hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425' 'e73043362938b9824') + result = script.pip('hash', _hello_file(tmpdir)) assert expected in str(result) + + +def test_good_algo_option(script, tmpdir): + """Make sure the -a option works.""" + expected = ('--hash=sha512:9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caad' + 'ae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e' + '5c3adef46f73bcdec043') + result = script.pip('hash', '-a', 'sha512', _hello_file(tmpdir)) + assert expected in str(result) + + +def test_bad_algo_option(script, tmpdir): + """Make sure the -a option raises an error when given a bad operand.""" + result = script.pip('hash', '-a', 'poppycock', _hello_file(tmpdir), + expect_error=True) + assert "invalid choice: 'poppycock'" in str(result) + + +def _hello_file(tmpdir): + """Return a temp file to hash containing "hello".""" + file = tmpdir / 'hashable' + file.write('hello') + return file From 4c405a0ad3c308eae0dba9c1b72615d886df2bd1 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 12 Oct 2015 14:37:03 -0400 Subject: [PATCH 30/39] Restore deleted _copy_dist_from_dir(). This reverts commit 62ac258e1e45af4b4b4103b8cc5d32d6d97284f4. https://github.com/pypa/pip/pull/3176 is about to add the missing piece that makes this code useful (and not dead), so let's not delete it. --- pip/download.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/pip/download.py b/pip/download.py index 9fc7d02fdc6..a78e40a33db 100644 --- a/pip/download.py +++ b/pip/download.py @@ -702,6 +702,45 @@ def unpack_file_url(link, location, download_dir=None, hashes=None): _copy_file(from_path, download_dir, content_type, link) +def _copy_dist_from_dir(link_path, location): + """Copy distribution files in `link_path` to `location`. + + Invoked when user requests to install a local directory. E.g.: + + pip install . + pip install ~/dev/git-repos/python-prompt-toolkit + + """ + + # Note: This is currently VERY SLOW if you have a lot of data in the + # directory, because it copies everything with `shutil.copytree`. + # What it should really do is build an sdist and install that. + # See https://github.com/pypa/pip/issues/2195 + + if os.path.isdir(location): + rmtree(location) + + # build an sdist + setup_py = 'setup.py' + sdist_args = [sys.executable] + sdist_args.append('-c') + sdist_args.append( + "import setuptools, tokenize;__file__=%r;" + "exec(compile(getattr(tokenize, 'open', open)(__file__).read()" + ".replace('\\r\\n', '\\n'), __file__, 'exec'))" % setup_py) + sdist_args.append('sdist') + sdist_args += ['--dist-dir', location] + logger.info('Running setup.py sdist for %s', link_path) + + with indent_log(): + call_subprocess(sdist_args, cwd=link_path, show_stdout=False) + + # unpack sdist into `location` + sdist = os.path.join(location, os.listdir(location)[0]) + logger.info('Unpacking sdist %s into %s', sdist, location) + unpack_file(sdist, location, content_type=None, link=None) + + class PipXmlrpcTransport(xmlrpc_client.Transport): """Provide a `xmlrpclib.Transport` implementation via a `PipSession` object. From dcf39bfdf064f6606626d472875b191838448d86 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 12 Oct 2015 14:48:30 -0400 Subject: [PATCH 31/39] Add imports to make the pep8 checker happy about the dead _copy_dist_from_dir(). --- pip/download.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pip/download.py b/pip/download.py index a78e40a33db..0b598f1e052 100644 --- a/pip/download.py +++ b/pip/download.py @@ -28,8 +28,9 @@ from pip.models import PyPI from pip.utils import (splitext, rmtree, format_size, display_path, backup_dir, ask_path_exists, unpack_file, - ARCHIVE_EXTENSIONS, consume) + ARCHIVE_EXTENSIONS, consume, call_subprocess) from pip.utils.filesystem import check_path_owner +from pip.utils.logging import indent_log from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner from pip.locations import write_delete_marker_file from pip.vcs import vcs From 7c5e5039ad3131aa2c3deb5741fef321a7af6e5a Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 12 Oct 2015 16:15:31 -0400 Subject: [PATCH 32/39] Remove unneeded triple quotes. --- pip/commands/hash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pip/commands/hash.py b/pip/commands/hash.py index 0d138ad4efc..4f5fdbecfcc 100644 --- a/pip/commands/hash.py +++ b/pip/commands/hash.py @@ -23,7 +23,7 @@ class HashCommand(Command): """ name = 'hash' - usage = """%prog [options] ...""" + usage = '%prog [options] ...' summary = 'Compute hashes of package archives.' def __init__(self, *args, **kw): From e23f59673e16878cd8774e229eb49f08ee82307a Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 12 Oct 2015 16:29:08 -0400 Subject: [PATCH 33/39] Consolidate hash constants in pip.utils.hashing. --- pip/cmdoptions.py | 14 +++----------- pip/commands/hash.py | 7 +++---- pip/exceptions.py | 7 ++----- pip/utils/hashes.py | 13 +++++++++++-- 4 files changed, 19 insertions(+), 22 deletions(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 0d986427bf3..836b04ee529 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -17,6 +17,7 @@ PyPI, FormatControl, fmt_ctl_handle_mutual_exclude, fmt_ctl_no_binary, fmt_ctl_no_use_wheel) from pip.locations import CA_BUNDLE_PATH, USER_CACHE_DIR, src_prefix +from pip.utils.hashes import STRONG_HASHES def make_option_group(group, parser): @@ -523,14 +524,6 @@ def only_binary(): ) -def strong_hashes(): - """Return names of hashlib algorithms allowed by the --hash option. - - Currently, those are the ones at least as collision-resistant as sha256. - """ - return ['sha256', 'sha384', 'sha512'] - - def _merge_hash(option, opt_str, value, parser): """Given a value spelled "algo:digest", append the digest to a list pointed to in a dict by the algo name.""" @@ -542,10 +535,9 @@ def _merge_hash(option, opt_str, value, parser): parser.error('Arguments to %s must be a hash name ' 'followed by a value, like --hash=sha256:abcde...' % opt_str) - strongs = strong_hashes() - if algo not in strongs: + if algo not in STRONG_HASHES: parser.error('Allowed hash algorithms for %s are %s.' % - (opt_str, ', '.join(strongs))) + (opt_str, ', '.join(STRONG_HASHES))) parser.values.hashes.setdefault(algo, []).append(digest) diff --git a/pip/commands/hash.py b/pip/commands/hash.py index 4f5fdbecfcc..39ca37c8d0f 100644 --- a/pip/commands/hash.py +++ b/pip/commands/hash.py @@ -5,10 +5,9 @@ import sys from pip.basecommand import Command -from pip.cmdoptions import strong_hashes -from pip.exceptions import FAVORITE_HASH from pip.status_codes import ERROR from pip.utils import read_chunks +from pip.utils.hashes import FAVORITE_HASH, STRONG_HASHES logger = logging.getLogger(__name__) @@ -31,11 +30,11 @@ def __init__(self, *args, **kw): self.cmd_opts.add_option( '-a', '--algorithm', dest='algorithm', - choices=strong_hashes(), + choices=STRONG_HASHES, action='store', default=FAVORITE_HASH, help='The hash algorithm to use: one of %s' % - ', '.join(strong_hashes())) + ', '.join(STRONG_HASHES)) self.parser.insert_option_group(0, self.cmd_opts) def run(self, options, args): diff --git a/pip/exceptions.py b/pip/exceptions.py index 7cdfcf7af60..e9b639f54bc 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -51,11 +51,6 @@ class UnsupportedWheel(InstallationError): """Unsupported wheel.""" -# The recommended hash algo of the moment. Change this whenever the state of -# the art changes; it won't hurt backward compatibility. -FAVORITE_HASH = 'sha256' - - class HashErrors(InstallationError): """Multiple HashError instances rolled into one for reporting""" @@ -163,6 +158,8 @@ def __init__(self, gotten_hash): self.gotten_hash = gotten_hash def body(self): + from pip.utils.hashes import FAVORITE_HASH # Dodge circular import. + package_name = (self.req.req if self.req and # In case someone feeds something # downright stupid to diff --git a/pip/utils/hashes.py b/pip/utils/hashes.py index 2e58bca26ca..960297007ae 100644 --- a/pip/utils/hashes.py +++ b/pip/utils/hashes.py @@ -2,12 +2,21 @@ import hashlib -from pip.exceptions import (HashMismatch, HashMissing, InstallationError, - FAVORITE_HASH) +from pip.exceptions import HashMismatch, HashMissing, InstallationError from pip.utils import read_chunks from pip._vendor.six import iteritems, iterkeys, itervalues +# The recommended hash algo of the moment. Change this whenever the state of +# the art changes; it won't hurt backward compatibility. +FAVORITE_HASH = 'sha256' + + +# Names of hashlib algorithms allowed by the --hash option and ``pip hash`` +# Currently, those are the ones at least as collision-resistant as sha256. +STRONG_HASHES = ['sha256', 'sha384', 'sha512'] + + class Hashes(object): """A wrapper that builds multiple hashes at once and checks them against known-good values From 925e4b44667105426de5326f59dcc6daf123c5fa Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Fri, 16 Oct 2015 15:58:59 -0400 Subject: [PATCH 34/39] Fix false hash mismatches when installing a package that has a cached wheel. This would occur when, for example, installing from a requirements file that references a certain hashed sdist, a common situation. As of pip 7, pip always tries to build a wheel for each requirement (if one wasn't provided directly) and installs from that. The way this was implemented, InstallRequirement.link pointed to the cached wheel, which obviously had a different hash than the index-sourced archive, so spurious mismatch errors would result. Now we no longer read from the wheel cache in hash-checking mode. Make populate_link(), rather than the `link` setter, responsible for mapping InstallRequirement.link to a cached wheel. populate_link() isn't called until until prepare_files(). At that point, when we've examined all InstallRequirements and their potential --hash options, we know whether we should be requiring hashes and thus whether to use the wheel cache at all. The only place that sets InstallRequirement.link other than InstallRequirement itself is pip.wheel, which does so long after hashes have been checked, when it's unpacking the wheel it just built, so it won't cause spurious hash mismatches. --- docs/reference/pip_install.rst | 34 ++++++++++++++++++--------- pip/req/req_install.py | 27 ++++++++++----------- pip/req/req_set.py | 5 ++-- tests/functional/test_install_reqs.py | 32 ++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 29 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index f6d72f61921..40197164045 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -428,11 +428,11 @@ Windows Wheel Cache ~~~~~~~~~~~ -Pip will read from the subdirectory ``wheels`` within the pip cache dir and use -any packages found there. This is disabled via the same ``no-cache-dir`` option -that disables the HTTP cache. The internal structure of that cache is not part -of the pip API. As of 7.0 pip uses a subdirectory per sdist that wheels were -built from, and wheels within that subdirectory. +Pip will read from the subdirectory ``wheels`` within the pip cache directory +and use any packages found there. This is disabled via the same +``--no-cache-dir`` option that disables the HTTP cache. The internal structure +of that is not part of the pip API. As of 7.0, pip makes a subdirectory for +each sdist that wheels are built from and places the resulting wheels inside. Pip attempts to choose the best wheels from those built in preference to building a new wheel. Note that this means when a package has both optional @@ -463,11 +463,11 @@ variety of platforms.) The recommended hash algorithm at the moment is sha256, but stronger ones are allowed, including all those supported by ``hashlib``. However, weaker ones -such as md5, sha1, and sha224 are excluded to avert false assurances of +such as md5, sha1, and sha224 are excluded to avoid giving a false sense of security. Hash verification is an all-or-nothing proposition. Specifying a ``--hash`` -against any requirement not only checks that hash but also activates +against any requirement not only checks that hash but also activates a global *hash-checking mode*, which imposes several other security restrictions: * Hashes are required for all requirements. This is because a partially-hashed @@ -477,7 +477,7 @@ against any requirement not only checks that hash but also activates ``#md5=...`` syntax suffice to satisfy this rule (regardless of hash strength, for legacy reasons), though you should use a stronger hash like sha256 whenever possible. -* Hashes are required for all dependencies. An error is raised if there is a +* Hashes are required for all dependencies. An error results if there is a dependency that is not spelled out and hashed in the requirements file. * Requirements that take the form of project names (rather than URLs or local filesystem paths) must be pinned to a specific version using ``==``. This @@ -506,9 +506,21 @@ fetches only the preferred archive for each package, so you may still need to add hashes for alternatives archives using :ref:`pip hash`: for instance if there is both a binary and a source distribution. -Hash-checking mode also functions with :ref:`pip download` and :ref:`pip -wheel`. A :ref:`comparison of hash-checking mode with other repeatability -strategies ` is available in the User Guide. +The :ref:`wheel cache ` is disabled in hash-checking mode to +prevent spurious hash mismatch errors. These would otherwise occur while +installing sdists that had already been automatically built into cached wheels: +those wheels would be selected for installation, but their hashes would not +match the sdist ones from the requirements file. A further complication is that +locally built wheels are nondeterministic: contemporary modification times make +their way into the archive, making hashes unpredictable across machines and +cache flushes. However, wheels fetched from index servers land in pip's HTTP +cache, not its wheel cache, and are used normally in hash-checking mode. The +only potential penalty is thus extra build time for sdists, and this can be +solved by making sure pre-built wheels are available from the index server. + +Hash-checking mode also works with :ref:`pip download` and :ref:`pip wheel`. A +:ref:`comparison of hash-checking mode with other repeatability strategies +` is available in the User Guide. .. warning:: Beware of the ``setup_requires`` keyword arg in :file:`setup.py`. The diff --git a/pip/req/req_install.py b/pip/req/req_install.py index 62bc6a18018..0adf632d159 100644 --- a/pip/req/req_install.py +++ b/pip/req/req_install.py @@ -239,28 +239,25 @@ def __repr__(self): return '<%s object: %s editable=%r>' % ( self.__class__.__name__, str(self), self.editable) - def populate_link(self, finder, upgrade): + def populate_link(self, finder, upgrade, require_hashes): """Ensure that if a link can be found for this, that it is found. Note that self.link may still be None - if Upgrade is False and the requirement is already installed. + + If require_hashes is True, don't use the wheel cache, because cached + wheels, always built locally, have different hashes than the files + downloaded from the index server and thus throw false hash mismatches. + Furthermore, cached wheels at present have undeterministic contents due + to file modification times. """ if self.link is None: self.link = finder.find_requirement(self, upgrade) - - @property - def link(self): - return self._link - - @link.setter - def link(self, link): - # Lookup a cached wheel, if possible. - if self._wheel_cache is None: - self._link = link - else: - self._link = self._wheel_cache.cached_wheel(link, self.name) - if self._link != link: - logger.debug('Using cached wheel link: %s', self._link) + if self._wheel_cache is not None and not require_hashes: + old_link = self.link + self.link = self._wheel_cache.cached_wheel(self.link, self.name) + if old_link != self.link: + logger.debug('Using cached wheel link: %s', self.link) @property def specifier(self): diff --git a/pip/req/req_set.py b/pip/req/req_set.py index ab260e45546..24bd338d655 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -341,7 +341,7 @@ def prepare_files(self, finder): # Actually prepare the files, and collect any exceptions. Most hash # exceptions cannot be checked ahead of time, because - # req.populate_links() needs to be called before we can make decisions + # req.populate_link() needs to be called before we can make decisions # based on link type. discovered_reqs = [] hash_errors = HashErrors() @@ -502,7 +502,8 @@ def _prepare_file(self, "can delete this. Please delete it and try again." % (req_to_install, req_to_install.source_dir) ) - req_to_install.populate_link(finder, self.upgrade) + req_to_install.populate_link( + finder, self.upgrade, require_hashes) # We can't hit this spot and have populate_link return None. # req_to_install.satisfied_by is None here (because we're # guarded) and upgrade has no impact except when satisfied_by diff --git a/tests/functional/test_install_reqs.py b/tests/functional/test_install_reqs.py index f1ba71b1b71..5c2ed0c6493 100644 --- a/tests/functional/test_install_reqs.py +++ b/tests/functional/test_install_reqs.py @@ -3,7 +3,7 @@ import pytest -from tests.lib import (pyversion, path_to_url, +from tests.lib import (pyversion, path_to_url, requirements_file, _create_test_package_with_subdirectory) from tests.lib.local_repos import local_checkout @@ -313,3 +313,33 @@ def test_constrained_to_url_install_same_url(script, data): 'install', '--no-index', '-f', data.find_links, '-c', script.scratch_path / 'constraints.txt', to_install) assert 'Running setup.py install for singlemodule' in result.stdout + + +@pytest.mark.network +def test_double_install_spurious_hash_mismatch(script, tmpdir): + """Make sure installing the same hashed sdist twice doesn't throw hash + mismatch errors. + + Really, this is a test that we disable reads from the wheel cache in + hash-checking mode. Locally, implicitly built wheels of sdists obviously + have different hashes from the original archives. Comparing against those + causes spurious mismatch errors. + + """ + script.pip('install', 'wheel') # Otherwise, it won't try to build wheels. + with requirements_file('simple==1.0 --hash=sha256:393043e672415891885c9a2a' + '0929b1af95fb866d6ca016b42d2e6ce53619b653', + tmpdir) as reqs_file: + # Install a package (and build its wheel): + result = script.pip_install_local( + '-r', reqs_file.abspath, expect_error=False) + assert 'Successfully installed simple-1.0' in str(result) + + # Uninstall it: + script.pip('uninstall', '-y', 'simple', expect_error=False) + + # Then install it again. We should not hit a hash mismatch, and the + # package should install happily. + result = script.pip_install_local( + '-r', reqs_file.abspath, expect_error=False) + assert 'Successfully installed simple-1.0' in str(result) From 622b430491cf399b8369444b62872f730dae8e24 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 19 Oct 2015 23:13:48 -0400 Subject: [PATCH 35/39] Typos and docstrings --- pip/download.py | 2 +- pip/wheel.py | 2 +- tests/functional/test_hash.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pip/download.py b/pip/download.py index 0b598f1e052..9dd3a42a4ba 100644 --- a/pip/download.py +++ b/pip/download.py @@ -338,7 +338,7 @@ def __init__(self, *args, **kwargs): # We want to _only_ cache responses on securely fetched origins. We do # this because we can't validate the response of an insecurely fetched # origin, and we don't want someone to be able to poison the cache and - # require manual evication from the cache to fix it. + # require manual eviction from the cache to fix it. if cache: secure_adapter = CacheControlAdapter( cache=SafeFileCache(cache, use_dir_lock=True), diff --git a/pip/wheel.py b/pip/wheel.py index 89ca626108d..4d39e68c301 100644 --- a/pip/wheel.py +++ b/pip/wheel.py @@ -720,7 +720,7 @@ def _clean_one(self, req): def build(self, autobuilding=False): """Build wheels. - :param unpack: If True, replace the sdist we built from the with the + :param unpack: If True, replace the sdist we built from with the newly built wheel, in preparation for installation. :return: True if all the wheels built correctly. """ diff --git a/tests/functional/test_hash.py b/tests/functional/test_hash.py index dc19b2c7168..9fc0d6e9108 100644 --- a/tests/functional/test_hash.py +++ b/tests/functional/test_hash.py @@ -1,3 +1,6 @@ +"""Tests for the ``pip hash`` command""" + + def test_basic(script, tmpdir): """Run 'pip hash' through its default behavior.""" expected = ('--hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425' From ee9d6fb2c30626b73c774933efba23b990f993ab Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 19 Oct 2015 23:26:23 -0400 Subject: [PATCH 36/39] Modernize recommendations to not call setuptools-level things directly. --- docs/reference/pip_install.rst | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index 40197164045..955fd877e5b 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -531,14 +531,20 @@ Hash-checking mode also works with :ref:`pip download` and :ref:`pip wheel`. A .. warning:: Be careful not to nullify all your security work when you install your - actual project. If you call ``python setup.py install`` after installing - your requirements, setuptools will happily go out and download, unchecked, + actual project by using setuptools directly: for example, by calling + ``python setup.py install``, ``python setup.py develop``, or + ``easy_install``. Setuptools will happily go out and download, unchecked, anything you missed in your requirements file—and it’s easy to miss things - as your project evolves. One way to be safe is to pack up your project and - then install that using pip and :ref:`--no-deps `:: + as your project evolves. To be safe, install your project using pip and + :ref:`--no-deps `. - python setup.py sdist - pip install --no-deps dist/yourproject-1.0.tar.gz + Instead of ``python setup.py develop``, use... :: + + pip install --no-deps -e . + + Instead of ``python setup.py install``, use... :: + + pip install --no-deps . Hashes from PyPI From 3af5ffa5ce0cfd46f92d89f4560ce0f7dc9ba02c Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Mon, 19 Oct 2015 23:38:07 -0400 Subject: [PATCH 37/39] Improve flow of --require-hashes help message. --- pip/cmdoptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 836b04ee529..73a805e598f 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -561,8 +561,8 @@ def _merge_hash(option, opt_str, value, parser): action='store_true', default=False, help='Require a hash to check each requirement against, for ' - 'repeatable installs. Implied by the presence of a --hash ' - 'option on any package in a requirements file') + 'repeatable installs. This option is implied when any package in a ' + 'requirements file has a --hash option.') ########## From f38fc903f27aff9d7a903f65d3c51aca8384b740 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 21 Oct 2015 15:50:57 -0400 Subject: [PATCH 38/39] Obey --require-hashes option in requirements files. Removed the mention of "package index options" in the docs, because they don't all fit that category anymore. Not even --no-binary and --only-binary do; they're "install options". --- docs/reference/pip_install.rst | 5 ++++- pip/basecommand.py | 3 +++ pip/req/req_file.py | 6 ++++++ pip/req/req_set.py | 4 ++-- tests/unit/test_req.py | 18 +++++++++++++++++- 5 files changed, 32 insertions(+), 4 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index 955fd877e5b..dcde6c98c18 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -101,7 +101,7 @@ and the newline following it is effectively ignored. Comments are stripped *before* line continuations are processed. -Additionally, the following Package Index Options are supported: +The following options are supported: * :ref:`-i, --index-url <--index-url>` * :ref:`--extra-index-url <--extra-index-url>` @@ -109,6 +109,7 @@ Additionally, the following Package Index Options are supported: * :ref:`-f, --find-links <--find-links>` * :ref:`--no-binary ` * :ref:`--only-binary ` + * :ref:`--require-hashes <--require-hashes>` For example, to specify :ref:`--no-index <--no-index>` and 2 :ref:`--find-links <--find-links>` locations: @@ -486,6 +487,8 @@ against any requirement not only checks that hash but also activates a global * ``--egg`` is disallowed, because it delegates installation of dependencies to setuptools, giving up pip's ability to enforce any of the above. +.. _`--require-hashes`: + Hash-checking mode can be forced on with the ``--require-hashes`` command-line option:: diff --git a/pip/basecommand.py b/pip/basecommand.py index 3627e9143fa..ce3ad40eb04 100644 --- a/pip/basecommand.py +++ b/pip/basecommand.py @@ -280,6 +280,9 @@ def populate_requirement_set(requirement_set, args, options, finder, wheel_cache=wheel_cache): found_req_in_file = True requirement_set.add_requirement(req) + # If --require-hashes was a line in a requirements file, tell + # RequirementSet about it: + requirement_set.require_hashes = options.require_hashes if not (args or options.editables or found_req_in_file): opts = {'name': name} diff --git a/pip/req/req_file.py b/pip/req/req_file.py index ff1c1a4a1b1..defbd7aaf04 100644 --- a/pip/req/req_file.py +++ b/pip/req/req_file.py @@ -46,6 +46,7 @@ cmdoptions.pre, cmdoptions.process_dependency_links, cmdoptions.trusted_host, + cmdoptions.require_hashes, ] # options to be passed to requirements @@ -123,6 +124,7 @@ def process_line(line, filename, line_number, finder=None, comes_from=None, affect the finder. :param constraint: If True, parsing a constraints file. + :param options: OptionParser options that we may update """ parser = build_parser() defaults = parser.get_default_values() @@ -187,6 +189,10 @@ def process_line(line, filename, line_number, finder=None, comes_from=None, for req in parser: yield req + # percolate hash-checking option upward + elif opts.require_hashes: + options.require_hashes = opts.require_hashes + # set finder options elif finder: if opts.allow_external: diff --git a/pip/req/req_set.py b/pip/req/req_set.py index 24bd338d655..10312dff229 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -190,7 +190,7 @@ def __init__(self, build_dir, src_dir, download_dir, upgrade=False, wheel_download_dir = normalize_path(wheel_download_dir) self.wheel_download_dir = wheel_download_dir self._wheel_cache = wheel_cache - self._require_hashes = require_hashes + self.require_hashes = require_hashes # Maps from install_req -> dependencies_of_install_req self._dependencies = defaultdict(list) @@ -331,7 +331,7 @@ def prepare_files(self, finder): # If any top-level requirement has a hash specified, enter # hash-checking mode, which requires hashes from all. root_reqs = self.unnamed_requirements + self.requirements.values() - require_hashes = (self._require_hashes or + require_hashes = (self.require_hashes or any(req.has_hash_options for req in root_reqs)) if require_hashes and self.as_egg: raise InstallationError( diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index a0488e0966d..6ddfb5d43a5 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -6,6 +6,7 @@ import pytest from mock import Mock, patch, mock_open +from pip.commands.install import InstallCommand from pip.exceptions import (PreviousBuildDirError, InvalidWheelFilename, UnsupportedWheel) from pip.download import path_to_url, PipSession @@ -16,7 +17,7 @@ from pip.req.req_install import parse_editable from pip.utils import read_text_file from pip._vendor import pkg_resources -from tests.lib import assert_raises_regexp +from tests.lib import assert_raises_regexp, requirements_file class TestRequirementSet(object): @@ -125,6 +126,21 @@ def test_missing_hash_with_require_hashes(self, data): reqset.prepare_files, finder) + def test_missing_hash_with_require_hashes_in_reqs_file(self, data, tmpdir): + """--require-hashes in a requirements file should make its way to the + RequirementSet. + """ + req_set = self.basic_reqset(require_hashes=False) + session = PipSession() + finder = PackageFinder([data.find_links], [], session=session) + command = InstallCommand() + with requirements_file('--require-hashes', tmpdir) as reqs_file: + options, args = command.parse_args(['-r', reqs_file]) + command.populate_requirement_set( + req_set, args, options, finder, session, command.name, + wheel_cache=None) + assert req_set.require_hashes + def test_unsupported_hashes(self, data): """VCS and dir links should raise errors when --require-hashes is on. From 4488047ef6205163e80f88bd46c62b3932490cb1 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 21 Oct 2015 16:35:30 -0400 Subject: [PATCH 39/39] Update the wheel-cache-disabling docs with our latest understanding of C compiler nondeterminism. --- docs/reference/pip_install.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index dcde6c98c18..1137da64101 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -516,10 +516,13 @@ those wheels would be selected for installation, but their hashes would not match the sdist ones from the requirements file. A further complication is that locally built wheels are nondeterministic: contemporary modification times make their way into the archive, making hashes unpredictable across machines and -cache flushes. However, wheels fetched from index servers land in pip's HTTP -cache, not its wheel cache, and are used normally in hash-checking mode. The -only potential penalty is thus extra build time for sdists, and this can be -solved by making sure pre-built wheels are available from the index server. +cache flushes. Compilation of C code adds further nondeterminism, as many +compilers include random-seeded values in their output. However, wheels fetched +from index servers are the same every time. They land in pip's HTTP cache, not +its wheel cache, and are used normally in hash-checking mode. The only downside +of having the the wheel cache disabled is thus extra build time for sdists, and +this can be solved by making sure pre-built wheels are available from the index +server. Hash-checking mode also works with :ref:`pip download` and :ref:`pip wheel`. A :ref:`comparison of hash-checking mode with other repeatability strategies