Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up stubs suggestions #17965

Merged
merged 7 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 7 additions & 14 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
from mypy.plugins.default import DefaultPlugin
from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor
from mypy.stats import dump_type_stats
from mypy.stubinfo import legacy_bundled_packages, non_bundled_packages, stub_distribution_name
from mypy.stubinfo import is_module_from_legacy_bundled_package, stub_distribution_name
from mypy.types import Type
from mypy.typestate import reset_global_state, type_state
from mypy.version import __version__
Expand Down Expand Up @@ -2667,17 +2667,13 @@ def find_module_and_diagnose(

ignore_missing_imports = options.ignore_missing_imports

id_components = id.split(".")
# Don't honor a global (not per-module) ignore_missing_imports
# setting for modules that used to have bundled stubs, as
# otherwise updating mypy can silently result in new false
# negatives. (Unless there are stubs but they are incomplete.)
global_ignore_missing_imports = manager.options.ignore_missing_imports
if (
any(
".".join(id_components[:i]) in legacy_bundled_packages
for i in range(len(id_components), 0, -1)
)
is_module_from_legacy_bundled_package(id)
and global_ignore_missing_imports
and not options.ignore_missing_imports_per_module
and result is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
Expand Down Expand Up @@ -2798,18 +2794,15 @@ def module_not_found(
code = codes.IMPORT
errors.report(line, 0, msg.format(module=target), code=code)

components = target.split(".")
for i in range(len(components), 0, -1):
module = ".".join(components[:i])
if module in legacy_bundled_packages or module in non_bundled_packages:
break

dist = stub_distribution_name(target)
for note in notes:
if "{stub_dist}" in note:
note = note.format(stub_dist=stub_distribution_name(module))
assert dist is not None
note = note.format(stub_dist=dist)
errors.report(line, 0, note, severity="note", only_once=True, code=code)
if reason is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED:
manager.missing_stub_packages.add(stub_distribution_name(module))
assert dist is not None
manager.missing_stub_packages.add(dist)
errors.set_import_context(save_import_context)


Expand Down
5 changes: 2 additions & 3 deletions mypy/modulefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,8 @@ def _find_module_non_stub_helper(
# If this is not a directory then we can't traverse further into it
if not self.fscache.isdir(dir_path):
break
for i in range(len(components), 0, -1):
if approved_stub_package_exists(".".join(components[:i])):
return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
if approved_stub_package_exists(".".join(components)):
return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
if plausible_match:
return ModuleNotFoundReason.FOUND_WITHOUT_TYPE_HINTS
else:
Expand Down
60 changes: 46 additions & 14 deletions mypy/stubinfo.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,53 @@
from __future__ import annotations


def is_legacy_bundled_package(prefix: str) -> bool:
return prefix in legacy_bundled_packages
def is_module_from_legacy_bundled_package(module: str) -> bool:
top_level = module.split(".")[0]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if module.split(".", 1)[0] would be faster.

return top_level in legacy_bundled_packages


def approved_stub_package_exists(prefix: str) -> bool:
return is_legacy_bundled_package(prefix) or prefix in non_bundled_packages
def approved_stub_package_exists(module: str) -> bool:
components = module.split(".")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, it might be slightly faster to use module.split(".", 1)[0] to calculate prefix, and only do the full split in the body of the last if statement as needed.

top_level = components[0]
if top_level in legacy_bundled_packages:
return True
if top_level in non_bundled_packages_flat:
return True
if top_level in non_bundled_packages_namespace:
namespace = non_bundled_packages_namespace[top_level]
for i in range(len(components), 0, -1):
module = ".".join(components[:i])
if module in namespace:
return True
return False


def stub_distribution_name(prefix: str) -> str:
return legacy_bundled_packages.get(prefix) or non_bundled_packages[prefix]
def stub_distribution_name(module: str) -> str | None:
components = module.split(".")
top_level = components[0]

dist = legacy_bundled_packages.get(top_level)
if dist:
return dist
dist = non_bundled_packages_flat.get(top_level)
if dist:
return dist

if top_level in non_bundled_packages_namespace:
namespace = non_bundled_packages_namespace[top_level]
for i in range(len(components), 0, -1):
module = ".".join(components[:i])
dist = namespace.get(module)
if dist:
return dist

return None


# Stubs for these third-party packages used to be shipped with mypy.
#
# Map package name to PyPI stub distribution name.
legacy_bundled_packages = {
legacy_bundled_packages: dict[str, str] = {
"aiofiles": "types-aiofiles",
"bleach": "types-bleach",
"boto": "types-boto",
Expand All @@ -32,7 +63,6 @@ def stub_distribution_name(prefix: str) -> str:
"docutils": "types-docutils",
"first": "types-first",
"gflags": "types-python-gflags",
"google.protobuf": "types-protobuf",
"markdown": "types-Markdown",
"mock": "types-mock",
"OpenSSL": "types-pyOpenSSL",
Expand Down Expand Up @@ -66,20 +96,17 @@ def stub_distribution_name(prefix: str) -> str:
# include packages that have a release that includes PEP 561 type
# information.
#
# Package name can have one or two components ('a' or 'a.b').
#
# Note that these packages are omitted for now:
# pika: typeshed's stubs are on PyPI as types-pika-ts.
# types-pika already exists on PyPI, and is more complete in many ways,
# but is a non-typeshed stubs package.
non_bundled_packages = {
non_bundled_packages_flat: dict[str, str] = {
"MySQLdb": "types-mysqlclient",
"PIL": "types-Pillow",
"PyInstaller": "types-pyinstaller",
"Xlib": "types-python-xlib",
"aws_xray_sdk": "types-aws-xray-sdk",
"babel": "types-babel",
"backports.ssl_match_hostname": "types-backports.ssl_match_hostname",
"braintree": "types-braintree",
"bs4": "types-beautifulsoup4",
"bugbear": "types-flake8-bugbear",
Expand Down Expand Up @@ -107,7 +134,6 @@ def stub_distribution_name(prefix: str) -> str:
"flask_migrate": "types-Flask-Migrate",
"fpdf": "types-fpdf2",
"gdb": "types-gdb",
"google.cloud.ndb": "types-google-cloud-ndb",
"hdbcli": "types-hdbcli",
"html5lib": "types-html5lib",
"httplib2": "types-httplib2",
Expand All @@ -123,7 +149,6 @@ def stub_distribution_name(prefix: str) -> str:
"oauthlib": "types-oauthlib",
"openpyxl": "types-openpyxl",
"opentracing": "types-opentracing",
"paho.mqtt": "types-paho-mqtt",
"parsimonious": "types-parsimonious",
"passlib": "types-passlib",
"passpy": "types-passpy",
Expand Down Expand Up @@ -171,3 +196,10 @@ def stub_distribution_name(prefix: str) -> str:
"pandas": "pandas-stubs", # https://github.com/pandas-dev/pandas-stubs
"lxml": "lxml-stubs", # https://github.com/lxml/lxml-stubs
}


non_bundled_packages_namespace: dict[str, dict[str, str]] = {
"backports": {"backports.ssl_match_hostname": "types-backports.ssl_match_hostname"},
"google": {"google.cloud.ndb": "types-google-cloud-ndb", "google.protobuf": "types-protobuf"},
"paho": {"paho.mqtt": "types-paho-mqtt"},
}
35 changes: 31 additions & 4 deletions mypy/test/teststubinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,38 @@

import unittest

from mypy.stubinfo import is_legacy_bundled_package
from mypy.stubinfo import (
approved_stub_package_exists,
is_module_from_legacy_bundled_package,
legacy_bundled_packages,
non_bundled_packages_flat,
stub_distribution_name,
)


class TestStubInfo(unittest.TestCase):
def test_is_legacy_bundled_packages(self) -> None:
assert not is_legacy_bundled_package("foobar_asdf")
assert is_legacy_bundled_package("pycurl")
assert is_legacy_bundled_package("dataclasses")
assert not is_module_from_legacy_bundled_package("foobar_asdf")
assert is_module_from_legacy_bundled_package("pycurl")
assert is_module_from_legacy_bundled_package("dataclasses")

def test_approved_stub_package_exists(self) -> None:
assert not approved_stub_package_exists("foobar_asdf")
assert approved_stub_package_exists("pycurl")
assert approved_stub_package_exists("babel")
assert approved_stub_package_exists("google.cloud.ndb")
assert approved_stub_package_exists("google.cloud.ndb.submodule")
assert not approved_stub_package_exists("google.cloud.unknown")

def test_stub_distribution_name(self) -> None:
assert stub_distribution_name("foobar_asdf") is None
assert stub_distribution_name("pycurl") == "types-pycurl"
assert stub_distribution_name("babel") == "types-babel"
assert stub_distribution_name("google.cloud.ndb") == "types-google-cloud-ndb"
assert stub_distribution_name("google.cloud.ndb.submodule") == "types-google-cloud-ndb"
assert stub_distribution_name("google.cloud.unknown") is None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also test good.protobuf, since it's a slightly different case as there are two packages under the google prefix?


def test_period_in_top_level(self) -> None:
for packages in (non_bundled_packages_flat, legacy_bundled_packages):
for top_level_module in packages:
assert "." not in top_level_module
Loading