diff --git a/CHANGES.rst b/CHANGES.rst index 1bc5222e..6ec9d1f3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +v6.3.0 +====== + +* #115: Support ``installed-files.txt`` for ``Distribution.files`` + when present. + v6.2.1 ====== diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 217ca9cc..a298d7a1 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -12,6 +12,7 @@ import functools import itertools import posixpath +import contextlib import collections import inspect @@ -461,8 +462,8 @@ def files(self): :return: List of PackagePath for this distribution or None Result is `None` if the metadata file that enumerates files - (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is - missing. + (i.e. RECORD for dist-info, or installed-files.txt or + SOURCES.txt for egg-info) is missing. Result may be empty if the metadata exists but is empty. """ @@ -475,9 +476,19 @@ def make_file(name, hash=None, size_str=None): @pass_none def make_files(lines): - return list(starmap(make_file, csv.reader(lines))) + return starmap(make_file, csv.reader(lines)) - return make_files(self._read_files_distinfo() or self._read_files_egginfo()) + @pass_none + def skip_missing_files(package_paths): + return list(filter(lambda path: path.locate().exists(), package_paths)) + + return skip_missing_files( + make_files( + self._read_files_distinfo() + or self._read_files_egginfo_installed() + or self._read_files_egginfo_sources() + ) + ) def _read_files_distinfo(self): """ @@ -486,10 +497,43 @@ def _read_files_distinfo(self): text = self.read_text('RECORD') return text and text.splitlines() - def _read_files_egginfo(self): + def _read_files_egginfo_installed(self): """ - SOURCES.txt might contain literal commas, so wrap each line - in quotes. + Read installed-files.txt and return lines in a similar + CSV-parsable format as RECORD: each file must be placed + relative to the site-packages directory, and must also be + quoted (since file names can contain literal commas). + + This file is written when the package is installed by pip, + but it might not be written for other installation methods. + Hence, even if we can assume that this file is accurate + when it exists, we cannot assume that it always exists. + """ + text = self.read_text('installed-files.txt') + # We need to prepend the .egg-info/ subdir to the lines in this file. + # But this subdir is only available in the PathDistribution's self._path + # which is not easily accessible from this base class... + subdir = getattr(self, '_path', None) + if not text or not subdir: + return + with contextlib.suppress(Exception): + ret = [ + str((subdir / line).resolve().relative_to(self.locate_file(''))) + for line in text.splitlines() + ] + return map('"{}"'.format, ret) + + def _read_files_egginfo_sources(self): + """ + Read SOURCES.txt and return lines in a similar CSV-parsable + format as RECORD: each file name must be quoted (since it + might contain literal commas). + + Note that SOURCES.txt is not a reliable source for what + files are installed by a package. This file is generated + for a source archive, and the files that are present + there (e.g. setup.py) may not correctly reflect the files + that are present after the package has been installed. """ text = self.read_text('SOURCES.txt') return text and map('"{}"'.format, text.splitlines()) diff --git a/tests/fixtures.py b/tests/fixtures.py index bcbba5d4..6e72c6ab 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -86,8 +86,10 @@ def setUp(self): # Except for python/mypy#731, prefer to define -# FilesDef = Dict[str, Union['FilesDef', str]] -FilesDef = Dict[str, Union[Dict[str, Union[Dict[str, str], str]], str]] +# FilesDef = Dict[str, Union['FilesDef', str, bytes]] +FilesDef = Dict[ + str, Union[Dict[str, Union[Dict[str, Union[str, bytes]], str, bytes]], str, bytes] +] class DistInfoPkg(OnSysPath, SiteDir): @@ -214,6 +216,97 @@ def setUp(self): build_files(EggInfoPkg.files, prefix=self.site_dir) +class EggInfoPkgPipInstalledNoToplevel(OnSysPath, SiteDir): + files: FilesDef = { + "egg_with_module_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_module-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + egg_with_module.py + setup.py + egg_with_module_pkg.egg-info/PKG-INFO + egg_with_module_pkg.egg-info/SOURCES.txt + egg_with_module_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + ../egg_with_module.py + PKG-INFO + SOURCES.txt + top_level.txt + """, + # missing top_level.txt (to trigger fallback to installed-files.txt) + }, + "egg_with_module.py": """ + def main(): + print("hello world") + """, + } + + def setUp(self): + super().setUp() + build_files(EggInfoPkgPipInstalledNoToplevel.files, prefix=self.site_dir) + + +class EggInfoPkgPipInstalledNoModules(OnSysPath, SiteDir): + files: FilesDef = { + "egg_with_no_modules_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_no_modules-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + setup.py + egg_with_no_modules_pkg.egg-info/PKG-INFO + egg_with_no_modules_pkg.egg-info/SOURCES.txt + egg_with_no_modules_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + PKG-INFO + SOURCES.txt + top_level.txt + """, + # top_level.txt correctly reflects that no modules are installed + "top_level.txt": b"\n", + }, + } + + def setUp(self): + super().setUp() + build_files(EggInfoPkgPipInstalledNoModules.files, prefix=self.site_dir) + + +class EggInfoPkgSourcesFallback(OnSysPath, SiteDir): + files: FilesDef = { + "sources_fallback_pkg.egg-info": { + "PKG-INFO": "Name: sources_fallback-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + sources_fallback.py + setup.py + sources_fallback_pkg.egg-info/PKG-INFO + sources_fallback_pkg.egg-info/SOURCES.txt + """, + # missing installed-files.txt (i.e. not installed by pip) and + # missing top_level.txt (to trigger fallback to SOURCES.txt) + }, + "sources_fallback.py": """ + def main(): + print("hello world") + """, + } + + def setUp(self): + super().setUp() + build_files(EggInfoPkgSourcesFallback.files, prefix=self.site_dir) + + class EggInfoFile(OnSysPath, SiteDir): files: FilesDef = { "egginfo_file.egg-info": """ diff --git a/tests/test_api.py b/tests/test_api.py index 6dbce1fc..a85c62ad 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -27,6 +27,9 @@ def suppress_known_deprecation(): class APITests( fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, fixtures.DistInfoPkg, fixtures.DistInfoPkgWithDot, fixtures.EggInfoFile, @@ -62,15 +65,28 @@ def test_prefix_not_matched(self): distribution(prefix) def test_for_top_level(self): - self.assertEqual( - distribution('egginfo-pkg').read_text('top_level.txt').strip(), 'mod' - ) + tests = [ + ('egginfo-pkg', 'mod'), + ('egg_with_no_modules-pkg', ''), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + self.assertEqual( + distribution(pkg_name).read_text('top_level.txt').strip(), + expect_content, + ) def test_read_text(self): - top_level = [ - path for path in files('egginfo-pkg') if path.name == 'top_level.txt' - ][0] - self.assertEqual(top_level.read_text(), 'mod\n') + tests = [ + ('egginfo-pkg', 'mod\n'), + ('egg_with_no_modules-pkg', '\n'), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + top_level = [ + path for path in files(pkg_name) if path.name == 'top_level.txt' + ][0] + self.assertEqual(top_level.read_text(), expect_content) def test_entry_points(self): eps = entry_points() @@ -184,6 +200,9 @@ def test_files_dist_info(self): def test_files_egg_info(self): self._test_files(files('egginfo-pkg')) + self._test_files(files('egg_with_module-pkg')) + self._test_files(files('egg_with_no_modules-pkg')) + self._test_files(files('sources_fallback-pkg')) def test_version_egg_info_file(self): self.assertEqual(version('egginfo-file'), '0.1') diff --git a/tests/test_main.py b/tests/test_main.py index cc26c56c..7d6c79a4 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -171,11 +171,21 @@ def test_metadata_loads_egg_info(self): assert meta['Description'] == 'pôrˈtend' -class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase): +class DiscoveryTests( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + fixtures.DistInfoPkg, + unittest.TestCase, +): def test_package_discovery(self): dists = list(distributions()) assert all(isinstance(dist, Distribution) for dist in dists) assert any(dist.metadata['Name'] == 'egginfo-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_module-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_no_modules-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'sources_fallback-pkg' for dist in dists) assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists) def test_invalid_usage(self): @@ -362,3 +372,40 @@ def test_packages_distributions_all_module_types(self): assert distributions[f'in_package_{i}'] == ['all_distributions'] assert not any(name.endswith('.dist-info') for name in distributions) + + +class PackagesDistributionsEggTest( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + unittest.TestCase, +): + def test_packages_distributions_on_eggs(self): + """ + Test old-style egg packages with a variation of 'top_level.txt', + 'SOURCES.txt', and 'installed-files.txt', available. + """ + distributions = packages_distributions() + + def import_names_from_package(package_name): + return { + import_name + for import_name, package_names in distributions.items() + if package_name in package_names + } + + # egginfo-pkg declares one import ('mod') via top_level.txt + assert import_names_from_package('egginfo-pkg') == {'mod'} + + # egg_with_module-pkg has one import ('egg_with_module') inferred from + # installed-files.txt (top_level.txt is missing) + assert import_names_from_package('egg_with_module-pkg') == {'egg_with_module'} + + # egg_with_no_modules-pkg should not be associated with any import names + # (top_level.txt is empty, and installed-files.txt has no .py files) + assert import_names_from_package('egg_with_no_modules-pkg') == set() + + # sources_fallback-pkg has one import ('sources_fallback') inferred from + # SOURCES.txt (top_level.txt and installed-files.txt is missing) + assert import_names_from_package('sources_fallback-pkg') == {'sources_fallback'}