From c201a187be998ec9e9c179004e0cd28dc2c86137 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:52:20 -0700 Subject: [PATCH] Make is_sub_path faster (#17962) See #17948 - 1.01x faster on clean - 1.06x faster on long - 1.04x faster on openai - 1.26x faster on openai incremental --- mypy/build.py | 9 ++++----- mypy/modulefinder.py | 9 +++++++-- mypy/util.py | 24 ++++++++++++++++++++---- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 52c11e065b63..ac6471d2383f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -59,7 +59,7 @@ get_mypy_comments, hash_digest, is_stub_package_file, - is_sub_path, + is_sub_path_normabs, is_typeshed_file, module_prefix, read_py_file, @@ -3528,10 +3528,9 @@ def is_silent_import_module(manager: BuildManager, path: str) -> bool: if manager.options.no_silence_site_packages: return False # Silence errors in site-package dirs and typeshed - return any( - is_sub_path(path, dir) - for dir in manager.search_paths.package_path + manager.search_paths.typeshed_path - ) + if any(is_sub_path_normabs(path, dir) for dir in manager.search_paths.package_path): + return True + return any(is_sub_path_normabs(path, dir) for dir in manager.search_paths.typeshed_path) def write_undocumented_ref_info( diff --git a/mypy/modulefinder.py b/mypy/modulefinder.py index 94d8a5d59e1f..49c39a9ce91c 100644 --- a/mypy/modulefinder.py +++ b/mypy/modulefinder.py @@ -668,10 +668,13 @@ def mypy_path() -> list[str]: def default_lib_path( data_dir: str, pyversion: tuple[int, int], custom_typeshed_dir: str | None ) -> list[str]: - """Return default standard library search paths.""" + """Return default standard library search paths. Guaranteed to be normalised.""" + + data_dir = os.path.abspath(data_dir) path: list[str] = [] if custom_typeshed_dir: + custom_typeshed_dir = os.path.abspath(custom_typeshed_dir) typeshed_dir = os.path.join(custom_typeshed_dir, "stdlib") mypy_extensions_dir = os.path.join(custom_typeshed_dir, "stubs", "mypy-extensions") versions_file = os.path.join(typeshed_dir, "VERSIONS") @@ -711,7 +714,7 @@ def default_lib_path( @functools.lru_cache(maxsize=None) def get_search_dirs(python_executable: str | None) -> tuple[list[str], list[str]]: - """Find package directories for given python. + """Find package directories for given python. Guaranteed to return absolute paths. This runs a subprocess call, which generates a list of the directories in sys.path. To avoid repeatedly calling a subprocess (which can be slow!) we @@ -773,6 +776,7 @@ def compute_search_paths( root_dir = os.getenv("MYPY_TEST_PREFIX", None) if not root_dir: root_dir = os.path.dirname(os.path.dirname(__file__)) + root_dir = os.path.abspath(root_dir) lib_path.appendleft(os.path.join(root_dir, "test-data", "unit", "lib-stub")) # alt_lib_path is used by some tests to bypass the normal lib_path mechanics. # If we don't have one, grab directories of source files. @@ -829,6 +833,7 @@ def compute_search_paths( return SearchPaths( python_path=tuple(reversed(python_path)), mypy_path=tuple(mypypath), + # package_path and typeshed_path must be normalised and absolute via os.path.abspath package_path=tuple(sys_path + site_packages), typeshed_path=tuple(lib_path), ) diff --git a/mypy/util.py b/mypy/util.py index 8ec979af27e1..2eac2a86dfd0 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -6,7 +6,6 @@ import io import json import os -import pathlib import re import shutil import sys @@ -418,9 +417,26 @@ def replace_object_state( pass -def is_sub_path(path1: str, path2: str) -> bool: - """Given two paths, return if path1 is a sub-path of path2.""" - return pathlib.Path(path2) in pathlib.Path(path1).parents +def is_sub_path_normabs(path: str, dir: str) -> bool: + """Given two paths, return if path is a sub-path of dir. + + Moral equivalent of: Path(dir) in Path(path).parents + + Similar to the pathlib version: + - Treats paths case-sensitively + - Does not fully handle unnormalised paths (e.g. paths with "..") + - Does not handle a mix of absolute and relative paths + Unlike the pathlib version: + - Fast + - On Windows, assumes input has been slash normalised + - Handles even fewer unnormalised paths (e.g. paths with "." and "//") + + As a result, callers should ensure that inputs have had os.path.abspath called on them + (note that os.path.abspath will normalise) + """ + if not dir.endswith(os.sep): + dir += os.sep + return path.startswith(dir) if sys.platform == "linux" or sys.platform == "darwin":