Skip to content

Commit

Permalink
Refactor all package-mapping code into a new fawltydeps.packages module
Browse files Browse the repository at this point in the history
This moves:
 - DependenciesMapping and Package from fawltydeps.types
 - LocalPackageLookup and resolve_dependencies from fawltydeps.check
into the new fawltydeps.packages module.

On the tests/ side, we collect:
 - Package tests from test_types
 - LocalPackageLookup and resolve_dependencies() tests from
   test_map_dep_name_to_import_names
 - resolve_dependencies() tests from
   test_compare_imports_to_dependencies
into the corresponding new test_packages modules.

Since we reuse the FDTestVector test vectors from the
test_compare_imports_to_dependencies in test_packages, these now move
into the utils module, to be easily shared between test modules.

Finally, when combining resolve_dependencies() tests from
test_map_dep_name_to_import_names + test_compare_imports_to_dependencies
some overlap in test cases was identified and eliminated. The test
parameters to test_resolve_dependencies__focus_on_mappings() retain the
deduplicated test parameters.
  • Loading branch information
jherland committed Mar 6, 2023
1 parent 24689e8 commit 7b7b861
Show file tree
Hide file tree
Showing 9 changed files with 590 additions and 639 deletions.
78 changes: 3 additions & 75 deletions fawltydeps/check.py
Original file line number Diff line number Diff line change
@@ -1,93 +1,21 @@
"Compare imports and dependencies"
"""Compare imports and dependencies to determine undeclared and unused deps."""

import logging
import sys
from itertools import groupby
from typing import Dict, Iterable, List, Optional, Tuple
from typing import Dict, List

from fawltydeps.packages import Package
from fawltydeps.settings import Settings
from fawltydeps.types import (
DeclaredDependency,
DependenciesMapping,
Package,
ParsedImport,
UndeclaredDependency,
UnusedDependency,
)

# importlib.metadata.packages_distributions() was introduced in v3.10, but it
# is not able to infer import names for modules lacking a top_level.txt until
# v3.11. Hence we prefer importlib_metadata in v3.10 as well as pre-v3.10.
if sys.version_info >= (3, 11):
from importlib.metadata import packages_distributions
else:
from importlib_metadata import packages_distributions

logger = logging.getLogger(__name__)


class LocalPackageLookup:
"""Lookup import names exposed by packages installed in the current venv."""

def __init__(self) -> None:
"""Collect packages installed in the current python environment.
Use importlib.metadata to look up the mapping between packages and their
provided import names. This obviously depends on the Python environment
(e.g. virtualenv) that we're calling from.
"""
# We call packages_distributions() only _once here, and build a cache of
# Package objects from the information extracted.
self.packages: Dict[str, Package] = {}
for import_name, package_names in packages_distributions().items():
for package_name in package_names:
package = self.packages.setdefault(
Package.normalize_name(package_name),
Package(package_name),
)
package.add_import_names(
import_name, mapping=DependenciesMapping.LOCAL_ENV
)

def lookup_package(self, package_name: str) -> Optional[Package]:
"""Convert a package name to a locally available Package object.
(Although this function generally works with _all_ locally available
packages, we apply it only to the subset that is the dependencies of
the current project.)
Return the Package object that encapsulates the package-name-to-import-
names mapping for the given package name.
Return None if we're unable to find any import names for the given
package name. This is typically because the package is missing from the
current environment, or because we fail to determine its provided import
names.
"""
return self.packages.get(Package.normalize_name(package_name))


def resolve_dependencies(dep_names: Iterable[str]) -> Dict[str, Package]:
"""Associate dependencies with corresponding Package objects.
Use LocalPackageLookup to find Package objects for each of the given
dependencies. For dependencies that cannot be found with LocalPackageLookup,
fabricate an identity mapping (a pseudo-package making available an import
of the same name as the package, modulo normalization).
Return a dict mapping dependency names to the resolved Package objects.
"""
ret = {}
local_packages = LocalPackageLookup()
for name in dep_names:
if name not in ret:
package = local_packages.lookup_package(name)
if package is None: # fall back to identity mapping
package = Package.identity_mapping(name)
ret[name] = package
return ret


def calculate_undeclared(
imports: List[ParsedImport],
resolved_deps: Dict[str, Package],
Expand Down
8 changes: 2 additions & 6 deletions fawltydeps/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,9 @@
from pydantic.json import custom_pydantic_encoder # pylint: disable=no-name-in-module

from fawltydeps import extract_imports
from fawltydeps.check import (
calculate_undeclared,
calculate_unused,
resolve_dependencies,
)
from fawltydeps.check import calculate_undeclared, calculate_unused
from fawltydeps.extract_declared_dependencies import extract_declared_dependencies
from fawltydeps.packages import Package, resolve_dependencies
from fawltydeps.settings import (
Action,
OutputFormat,
Expand All @@ -38,7 +35,6 @@
)
from fawltydeps.types import (
DeclaredDependency,
Package,
ParsedImport,
UndeclaredDependency,
UnparseablePathException,
Expand Down
158 changes: 158 additions & 0 deletions fawltydeps/packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
"""Encapsulate the lookup of packages and their provided import names."""

import logging
import sys
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, Iterable, Optional, Set

from fawltydeps.utils import hide_dataclass_fields

# importlib.metadata.packages_distributions() was introduced in v3.10, but it
# is not able to infer import names for modules lacking a top_level.txt until
# v3.11. Hence we prefer importlib_metadata in v3.10 as well as pre-v3.10.
if sys.version_info >= (3, 11):
from importlib.metadata import packages_distributions
else:
from importlib_metadata import packages_distributions

logger = logging.getLogger(__name__)


class DependenciesMapping(str, Enum):
"""Types of dependency and imports mapping"""

IDENTITY = "identity"
LOCAL_ENV = "local_env"


@dataclass
class Package:
"""Encapsulate an installable Python package.
This encapsulates the mapping between a package name (i.e. something you can
pass to `pip install`) and the import names that it provides once it is
installed.
"""

package_name: str
mappings: Dict[DependenciesMapping, Set[str]] = field(default_factory=dict)
import_names: Set[str] = field(default_factory=set)

def __post_init__(self) -> None:
# The .import_names member is entirely redundant, as it can always be
# calculated from a union of self.mappings.values(). However, it is
# still used often enough (.is_used() is called once per declared
# dependency) that it makes sense to pre-calculate it, and rather hide
# the redundancy from our JSON output
self.import_names = {name for names in self.mappings.values() for name in names}
hide_dataclass_fields(self, "import_names")

@staticmethod
def normalize_name(package_name: str) -> str:
"""Perform standard normalization of package names.
Verbatim package names are not always appropriate to use in various
contexts: For example, a package can be installed using one spelling
(e.g. typing-extensions), but once installed, it is presented in the
context of the local environment with a slightly different spelling
(e.g. typing_extension).
"""
return package_name.lower().replace("-", "_")

def add_import_names(
self, *import_names: str, mapping: DependenciesMapping
) -> None:
"""Add import names provided by this package.
Import names must be associated with a DependenciesMapping enum value,
as keeping track of this is extremely helpful when debugging.
"""
self.mappings.setdefault(mapping, set()).update(import_names)
self.import_names.update(import_names)

def add_identity_import(self) -> None:
"""Add identity mapping to this package.
This builds on an assumption that a package 'foo' installed with e.g.
`pip install foo`, will also provide an import name 'foo'. This
assumption does not always hold, but sometimes we don't have much else
to go on...
"""
self.add_import_names(
self.normalize_name(self.package_name),
mapping=DependenciesMapping.IDENTITY,
)

@classmethod
def identity_mapping(cls, package_name: str) -> "Package":
"""Factory for conveniently creating identity-mapped package object."""
ret = cls(package_name)
ret.add_identity_import()
return ret

def is_used(self, imported_names: Iterable[str]) -> bool:
"""Return True iff this package is among the given import names."""
return bool(self.import_names.intersection(imported_names))


class LocalPackageLookup:
"""Lookup import names exposed by packages installed in the current venv."""

def __init__(self) -> None:
"""Collect packages installed in the current python environment.
Use importlib.metadata to look up the mapping between packages and their
provided import names. This obviously depends on the Python environment
(e.g. virtualenv) that we're calling from.
"""
# We call packages_distributions() only _once here, and build a cache of
# Package objects from the information extracted.
self.packages: Dict[str, Package] = {}
for import_name, package_names in packages_distributions().items():
for package_name in package_names:
package = self.packages.setdefault(
Package.normalize_name(package_name),
Package(package_name),
)
package.add_import_names(
import_name, mapping=DependenciesMapping.LOCAL_ENV
)

def lookup_package(self, package_name: str) -> Optional[Package]:
"""Convert a package name to a locally available Package object.
(Although this function generally works with _all_ locally available
packages, we apply it only to the subset that is the dependencies of
the current project.)
Return the Package object that encapsulates the package-name-to-import-
names mapping for the given package name.
Return None if we're unable to find any import names for the given
package name. This is typically because the package is missing from the
current environment, or because we fail to determine its provided import
names.
"""
return self.packages.get(Package.normalize_name(package_name))


def resolve_dependencies(dep_names: Iterable[str]) -> Dict[str, Package]:
"""Associate dependencies with corresponding Package objects.
Use LocalPackageLookup to find Package objects for each of the given
dependencies. For dependencies that cannot be found with LocalPackageLookup,
fabricate an identity mapping (a pseudo-package making available an import
of the same name as the package, modulo normalization).
Return a dict mapping dependency names to the resolved Package objects.
"""
ret = {}
local_packages = LocalPackageLookup()
for name in dep_names:
if name not in ret:
package = local_packages.lookup_package(name)
if package is None: # fall back to identity mapping
package = Package.identity_mapping(name)
ret[name] = package
return ret
80 changes: 1 addition & 79 deletions fawltydeps/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

import sys
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from functools import total_ordering
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

from fawltydeps.utils import hide_dataclass_fields

Expand Down Expand Up @@ -124,83 +123,6 @@ class DeclaredDependency:
source: Location


class DependenciesMapping(str, Enum):
"""Types of dependency and imports mapping"""

IDENTITY = "identity"
LOCAL_ENV = "local_env"


@dataclass
class Package:
"""Encapsulate an installable Python package.
This encapsulates the mapping between a package name (i.e. something you can
pass to `pip install`) and the import names that it provides once it is
installed.
"""

package_name: str
mappings: Dict[DependenciesMapping, Set[str]] = field(default_factory=dict)
import_names: Set[str] = field(default_factory=set)

def __post_init__(self) -> None:
# The .import_names member is entirely redundant, as it can always be
# calculated from a union of self.mappings.values(). However, it is
# still used often enough (.is_used() is called once per declared
# dependency) that it makes sense to pre-calculate it, and rather hide
# the redundancy from our JSON output
self.import_names = {name for names in self.mappings.values() for name in names}
hide_dataclass_fields(self, "import_names")

@staticmethod
def normalize_name(package_name: str) -> str:
"""Perform standard normalization of package names.
Verbatim package names are not always appropriate to use in various
contexts: For example, a package can be installed using one spelling
(e.g. typing-extensions), but once installed, it is presented in the
context of the local environment with a slightly different spelling
(e.g. typing_extension).
"""
return package_name.lower().replace("-", "_")

def add_import_names(
self, *import_names: str, mapping: DependenciesMapping
) -> None:
"""Add import names provided by this package.
Import names must be associated with a DependenciesMapping enum value,
as keeping track of this is extremely helpful when debugging.
"""
self.mappings.setdefault(mapping, set()).update(import_names)
self.import_names.update(import_names)

def add_identity_import(self) -> None:
"""Add identity mapping to this package.
This builds on an assumption that a package 'foo' installed with e.g.
`pip install foo`, will also provide an import name 'foo'. This
assumption does not always hold, but sometimes we don't have much else
to go on...
"""
self.add_import_names(
self.normalize_name(self.package_name),
mapping=DependenciesMapping.IDENTITY,
)

@classmethod
def identity_mapping(cls, package_name: str) -> "Package":
"""Factory for conveniently creating identity-mapped package object."""
ret = cls(package_name)
ret.add_identity_import()
return ret

def is_used(self, imported_names: Iterable[str]) -> bool:
"""Return True iff this package is among the given import names."""
return bool(self.import_names.intersection(imported_names))


@dataclass
class UndeclaredDependency:
"""Undeclared dependency found by analysis in the 'check' module."""
Expand Down
Loading

0 comments on commit 7b7b861

Please sign in to comment.