diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index dffb167c74771f..609a6d9706ed20 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -55,11 +55,14 @@ hash supplied more than 2047 bytes of data at once in its constructor or .. index:: single: OpenSSL; (use in module hashlib) Constructors for hash algorithms that are always present in this module are -:func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, :func:`sha512`, +:func:`md5`, :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, :func:`sha512`, :func:`sha3_224`, :func:`sha3_256`, :func:`sha3_384`, :func:`sha3_512`, :func:`shake_128`, :func:`shake_256`, :func:`blake2b`, and :func:`blake2s`. -:func:`md5` is normally available as well, though it may be missing or blocked -if you are using a rare "FIPS compliant" build of Python. +Some of these may be missing or blocked if you are running in an environment +with OpenSSL's "FIPS mode" configured to exclude some hash algorithms from its +default provider and are using a Python runtime built with that in mind. Such +environments are unusual. + These correspond to :data:`algorithms_guaranteed`. Additional algorithms may also be available if your Python distribution's @@ -119,7 +122,7 @@ More condensed: Constructors ------------ -.. function:: new(name[, data], *, usedforsecurity=True) +.. function:: new(name[, data], \*, usedforsecurity=True) Is a generic constructor that takes the string *name* of the desired algorithm as its first parameter. It also exists to allow access to the @@ -134,16 +137,16 @@ Using :func:`new` with an algorithm name: '031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406' -.. function:: md5([, data], *, usedforsecurity=True) -.. function:: sha1([, data], *, usedforsecurity=True) -.. function:: sha224([, data], *, usedforsecurity=True) -.. function:: sha256([, data], *, usedforsecurity=True) -.. function:: sha384([, data], *, usedforsecurity=True) -.. function:: sha512([, data], *, usedforsecurity=True) -.. function:: sha3_224([, data], *, usedforsecurity=True) -.. function:: sha3_256([, data], *, usedforsecurity=True) -.. function:: sha3_384([, data], *, usedforsecurity=True) -.. function:: sha3_512([, data], *, usedforsecurity=True) +.. function:: md5([, data], \*, usedforsecurity=True) +.. function:: sha1([, data], \*, usedforsecurity=True) +.. function:: sha224([, data], \*, usedforsecurity=True) +.. function:: sha256([, data], \*, usedforsecurity=True) +.. function:: sha384([, data], \*, usedforsecurity=True) +.. function:: sha512([, data], \*, usedforsecurity=True) +.. function:: sha3_224([, data], \*, usedforsecurity=True) +.. function:: sha3_256([, data], \*, usedforsecurity=True) +.. function:: sha3_384([, data], \*, usedforsecurity=True) +.. function:: sha3_512([, data], \*, usedforsecurity=True) Named constructors such as these are faster than passing an algorithm name to :func:`new`. @@ -156,9 +159,10 @@ Hashlib provides the following constant module attributes: .. data:: algorithms_guaranteed A set containing the names of the hash algorithms guaranteed to be supported - by this module on all platforms. Note that 'md5' is in this list despite - some upstream vendors offering an odd "FIPS compliant" Python build that - excludes it. + by this module on all platforms. Note that the guarantees do not hold true + in the face of vendors offering "FIPS compliant" Python builds that exclude + some algorithms entirely. Similarly when OpenSSL is used and its FIPS mode + configuration disables some in the default provider. .. versionadded:: 3.2 diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 1b2c30cc32f564..44656c33a9981d 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -55,17 +55,18 @@ # This tuple and __get_builtin_constructor() must be modified if a new # always available algorithm is added. -__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', - 'blake2b', 'blake2s', - 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', - 'shake_128', 'shake_256') +__always_supported = [ + 'md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256', 'blake2b', 'blake2s' +] algorithms_guaranteed = set(__always_supported) algorithms_available = set(__always_supported) -__all__ = __always_supported + ('new', 'algorithms_guaranteed', - 'algorithms_available', 'file_digest') +__all__ = __always_supported + [ + 'new', 'algorithms_guaranteed', 'algorithms_available', 'file_digest'] __builtin_constructor_cache = {} @@ -129,7 +130,8 @@ def __get_openssl_constructor(name): return __get_builtin_constructor(name) try: # MD5, SHA1, and SHA2 are in all supported OpenSSL versions - # SHA3/shake are available in OpenSSL 1.1.1+ + # SHA3/shake are available in OpenSSL 1.1.1+; some forks omit + # the latter. f = getattr(_hashlib, 'openssl_' + name) # Allow the C module to raise ValueError. The function will be # defined but the hash not actually available. Don't fall back to @@ -161,8 +163,6 @@ def __hash_new(name, data=b'', **kwargs): except ValueError: # If the _hashlib module (OpenSSL) doesn't support the named # hash, try using our builtin implementations. - # This allows for SHA224/256 and SHA384/512 support even though - # the OpenSSL library prior to 0.9.8 doesn't provide them. return __get_builtin_constructor(name)(data) @@ -244,8 +244,10 @@ def file_digest(fileobj, digest, /, *, _bufsize=2**18): try: globals()[__func_name] = __get_hash(__func_name) except ValueError: - import logging - logging.exception('code for hash %s was not found.', __func_name) + # Errors logged here would be seen as noise by most people. + # Code using a missing hash will get an obvious exception. + __all__.remove(__func_name) + algorithms_available.remove(__func_name) # Cleanup locals() diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 575b2cd0da7056..1c1a0396c894de 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -66,6 +66,11 @@ def get_fips_mode(): SKIP_SHA3 = support.check_sanitizer(ub=True) requires_sha3 = unittest.skipUnless(not SKIP_SHA3, 'requires _sha3') +requires_usedforsecurity = unittest.skipIf( + get_fips_mode(), + "If an OpenSSL FIPS mode configuration has disabled any algorithms"+ + " in the default provider, this test would fail." +) def hexstr(s): assert isinstance(s, bytes), repr(s) @@ -102,6 +107,7 @@ class HashLibTestCase(unittest.TestCase): 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', 'shake_128', 'shake_256') + blakes = {'blake2b', 'blake2s'} shakes = {'shake_128', 'shake_256'} # gh-58898: Fallback modules are always compiled under POSIX. @@ -121,9 +127,12 @@ def __init__(self, *args, **kwargs): for algorithm in self.supported_hash_names: algorithms.add(algorithm.lower()) + # blake2s and blake2b *require* the _blake2 builtin. _blake2 = self._conditional_import_module('_blake2') if _blake2: - algorithms.update({'blake2b', 'blake2s'}) + algorithms.update(self.blakes) + else: + algorithms.difference_update(self.blakes) self.constructors_to_test = {} for algorithm in algorithms: @@ -134,7 +143,15 @@ def __init__(self, *args, **kwargs): # For each algorithm, test the direct constructor and the use # of hashlib.new given the algorithm name. for algorithm, constructors in self.constructors_to_test.items(): - constructors.add(getattr(hashlib, algorithm)) + if get_fips_mode(): + # Arbitrary algorithms may be missing via openssl.cnf + try: + constructor = getattr(hashlib, algorithm) + except AttributeError: + continue + constructors.add(constructor) + else: + constructors.add(getattr(hashlib, algorithm)) def _test_algorithm_via_hashlib_new(data=None, _alg=algorithm, **kwargs): if data is None: return hashlib.new(_alg, **kwargs) @@ -196,10 +213,6 @@ def hash_constructors(self): constructors = self.constructors_to_test.values() return itertools.chain.from_iterable(constructors) - @property - def is_fips_mode(self): - return get_fips_mode() - def test_hash_array(self): a = array.array("b", range(10)) for cons in self.hash_constructors: @@ -214,18 +227,24 @@ def test_algorithms_guaranteed(self): set(_algo for _algo in self.supported_hash_names if _algo.islower())) + @unittest.skipIf(get_fips_mode(), reason="guaranteed algorithms may not be available in FIPS mode") def test_algorithms_available(self): self.assertTrue(set(hashlib.algorithms_guaranteed). - issubset(hashlib.algorithms_available)) + issubset(hashlib.algorithms_available), + msg=f"\n{sorted(hashlib.algorithms_guaranteed)=}\n{sorted(hashlib.algorithms_available)=}") # all available algorithms must be loadable, bpo-47101 self.assertNotIn("undefined", hashlib.algorithms_available) for name in hashlib.algorithms_available: - digest = hashlib.new(name, usedforsecurity=False) - + with self.subTest(name=name): + if name in self.blakes and not _blake2: + self.skipTest("requires _blake2") + hashlib.new(name, usedforsecurity=False) + + @requires_usedforsecurity + @unittest.skipUnless(hasattr(hashlib, "sha256"), "sha256 unavailable") + @unittest.skipUnless(hasattr(hashlib, "md5"), "md5 unavailable") def test_usedforsecurity_true(self): hashlib.new("sha256", usedforsecurity=True) - if self.is_fips_mode: - self.skipTest("skip in FIPS mode") for cons in self.hash_constructors: cons(usedforsecurity=True) cons(b'', usedforsecurity=True) @@ -235,6 +254,8 @@ def test_usedforsecurity_true(self): self._hashlib.new("md5", usedforsecurity=True) self._hashlib.openssl_md5(usedforsecurity=True) + @unittest.skipUnless(hasattr(hashlib, "sha256"), "sha256 unavailable") + @unittest.skipUnless(hasattr(hashlib, "md5"), "md5 unavailable") def test_usedforsecurity_false(self): hashlib.new("sha256", usedforsecurity=False) for cons in self.hash_constructors: @@ -250,8 +271,9 @@ def test_unknown_hash(self): self.assertRaises(ValueError, hashlib.new, 'spam spam spam spam spam') self.assertRaises(TypeError, hashlib.new, 1) + @unittest.skipUnless(hasattr(hashlib, "sha256"), "sha256 unavailable") def test_new_upper_to_lower(self): - self.assertEqual(hashlib.new("SHA256").name, "sha256") + self.assertEqual(hashlib.new("SHA256", usedforsecurity=False).name, "sha256") def test_get_builtin_constructor(self): get_builtin_constructor = getattr(hashlib, @@ -309,10 +331,6 @@ def test_name_attribute(self): for cons in self.hash_constructors: h = cons(usedforsecurity=False) self.assertIsInstance(h.name, str) - if h.name in self.supported_hash_names: - self.assertIn(h.name, self.supported_hash_names) - else: - self.assertNotIn(h.name, self.supported_hash_names) self.assertEqual( h.name, hashlib.new(h.name, usedforsecurity=False).name @@ -353,7 +371,7 @@ def test_large_update(self): @requires_resource('cpu') def test_sha256_update_over_4gb(self): zero_1mb = b"\0" * 1024 * 1024 - h = hashlib.sha256() + h = hashlib.sha256(usedforsecurity=False) for i in range(0, 4096): h.update(zero_1mb) h.update(b"hello world") @@ -362,29 +380,33 @@ def test_sha256_update_over_4gb(self): @requires_resource('cpu') def test_sha3_256_update_over_4gb(self): zero_1mb = b"\0" * 1024 * 1024 - h = hashlib.sha3_256() + h = hashlib.sha3_256(usedforsecurity=False) for i in range(0, 4096): h.update(zero_1mb) h.update(b"hello world") self.assertEqual(h.hexdigest(), "e2d4535e3b613135c14f2fe4e026d7ad8d569db44901740beffa30d430acb038") + @requires_blake2 @requires_resource('cpu') def test_blake2_update_over_4gb(self): # blake2s or blake2b doesn't matter based on how our C code is structured, this tests the # common loop macro logic. zero_1mb = b"\0" * 1024 * 1024 - h = hashlib.blake2s() + h = hashlib.blake2s(usedforsecurity=False) for i in range(0, 4096): h.update(zero_1mb) h.update(b"hello world") self.assertEqual(h.hexdigest(), "8a268e83dd30528bc0907fa2008c91de8f090a0b6e0e60a5ff0d999d8485526f") def check(self, name, data, hexdigest, shake=False, **kwargs): + if 'usedforsecurity' not in kwargs: + kwargs['usedforsecurity'] = False length = len(hexdigest)//2 hexdigest = hexdigest.lower() constructors = self.constructors_to_test[name] # 2 is for hashlib.name(...) and hashlib.new(name, ...) - self.assertGreaterEqual(len(constructors), 2) + if get_fips_mode() == 0: + self.assertGreaterEqual(len(constructors), 2) for hash_object_constructor in constructors: m = hash_object_constructor(data, **kwargs) computed = m.hexdigest() if not shake else m.hexdigest(length) @@ -434,7 +456,8 @@ def check_no_unicode(self, algorithm_name): # Unicode objects are not allowed as input. constructors = self.constructors_to_test[algorithm_name] for hash_object_constructor in constructors: - self.assertRaises(TypeError, hash_object_constructor, 'spam') + with self.assertRaises(TypeError): + hash_object_constructor('spam', usedforsecurity=False) def test_no_unicode(self): self.check_no_unicode('md5') @@ -497,7 +520,7 @@ def test_blocksize_name_sha3(self): def check_sha3(self, name, capacity, rate, suffix): constructors = self.constructors_to_test[name] for hash_object_constructor in constructors: - m = hash_object_constructor() + m = hash_object_constructor(usedforsecurity=False) if HASH is not None and isinstance(m, HASH): # _hashopenssl's variant does not have extra SHA3 attributes continue @@ -661,7 +684,7 @@ def check_blake2(self, constructor, salt_size, person_size, key_size, digest_size, max_offset): self.assertEqual(constructor.SALT_SIZE, salt_size) for i in range(salt_size + 1): - constructor(salt=b'a' * i) + constructor(salt=b'a' * i, usedforsecurity=False) salt = b'a' * (salt_size + 1) self.assertRaises(ValueError, constructor, salt=salt) @@ -908,6 +931,7 @@ def test_case_shake256_vector(self): for msg, md in read_vectors('shake_256'): self.check('shake_256', msg, md, True) + @unittest.skipUnless(hasattr(hashlib, "sha256"), "sha256 unavailable") def test_gil(self): # Check things work fine with an input larger than the size required # for multithreaded operation (which is hardwired to 2048). @@ -922,7 +946,7 @@ def test_gil(self): m = cons(b'x' * gil_minsize, usedforsecurity=False) m.update(b'1') - m = hashlib.sha256() + m = hashlib.sha256(usedforsecurity=False) m.update(b'1') m.update(b'#' * gil_minsize) m.update(b'1') @@ -931,7 +955,8 @@ def test_gil(self): '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' ) - m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1') + m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1', + usedforsecurity=False) self.assertEqual( m.hexdigest(), '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' @@ -939,6 +964,7 @@ def test_gil(self): @threading_helper.reap_threads @threading_helper.requires_working_threading() + @unittest.skipUnless(hasattr(hashlib, "sha1"), "sha1 unavailable") def test_threaded_hashing(self): # Updating the same hash object from several threads at once # using data chunk sizes containing the same byte sequences. @@ -946,7 +972,7 @@ def test_threaded_hashing(self): # If the internal locks are working to prevent multiple # updates on the same object from running at once, the resulting # hash will be the same as doing it single threaded upfront. - hasher = hashlib.sha1() + hasher = hashlib.sha1(usedforsecurity=False) num_threads = 5 smallest_data = b'swineflu' data = smallest_data * 200000 @@ -975,8 +1001,7 @@ def hash_in_chunks(chunk_size): self.assertEqual(expected_hash, hasher.hexdigest()) def test_get_fips_mode(self): - fips_mode = self.is_fips_mode - if fips_mode is not None: + if (fips_mode := get_fips_mode()) is not None: self.assertIsInstance(fips_mode, int) @support.cpython_only @@ -1169,6 +1194,9 @@ def test_normalized_name(self): self.assertNotIn("blake2b512", hashlib.algorithms_available) self.assertNotIn("sha3-512", hashlib.algorithms_available) + # defaults True because file_digest doesn't support the parameter. + @requires_usedforsecurity + @unittest.skipUnless(hasattr(hashlib, "sha256"), "sha256 unavailable") def test_file_digest(self): data = b'a' * 65536 d1 = hashlib.sha256() diff --git a/Misc/NEWS.d/next/Library/2024-12-02-04-08-22.gh-issue-127298.8cpkfk.rst b/Misc/NEWS.d/next/Library/2024-12-02-04-08-22.gh-issue-127298.8cpkfk.rst new file mode 100644 index 00000000000000..079e809aeea977 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-02-04-08-22.gh-issue-127298.8cpkfk.rst @@ -0,0 +1,8 @@ +:mod:`hashlib` now avoids emitting a message to stderr when used in custom +build or system environment excluding any of our otherwise guaranteed +available hash functions. The ``algorithms_available`` attribute is now +updated to exclude algorithms that are not actually available at import time, +even if they are considered guaranteed in normal Python builds. Use of +OpenSSL FIPS mode runtime configs and some combination of the build time +``--with(out)-builtin-hashlib-hashes`` configure option are examples of when +this could happen.