From b28db698f50a82bf861b76e7ad6d75c31f1a820e Mon Sep 17 00:00:00 2001 From: Douglas Thor Date: Sun, 17 Nov 2024 16:38:57 -0800 Subject: [PATCH] refactor: Add GAZELLE_VERBOSE env and log parser failures (#2420) While investigating #2396 and why #2413 doesn't appear to be working for us, I realized that one of the things I was making heavy use of was additional parser logging that I had added. This adds some of that logging. I also throw in some documentation because I found it helpful. Users can (attempt to) get additional parse failure information by setting the `GAZELLE_VERBOSE` environment variable to `1`. Eg: ```console $ GAZELLE_VERBOSE=1 bazel run //:gazelle ``` Here are some example logs: ```console $ GAZELLE_VERBOSE=1 bazel run //:gazelle INFO: Invocation ID: a4e026d8-17df-426c-b1cc-d3980690dd53 ... INFO: Running command line: bazel-bin/gazelle INFO: Streaming build results to: https://btx.cloud.google.com/invocations/a4e026d8-17df-426c-b1cc-d3980690dd53 gazelle: WARNING: failed to parse "hello/get_deps.py". The resulting BUILD target may be incorrect. gazelle: Parse error at {Row:1 Column:0}: def search_one_more_level[T](): gazelle: The above was parsed as: (ERROR (identifier) (call function: (list (identifier)) arguments: (argument_list))) gazelle: ERROR: failed to generate target "//hello:get_deps" of kind "py_library": a target of kind "pyle_py_binary" with the same name already exists. Use the '# gazelle:python_library_naming_convention' directive to change the naming convention. $ $ bazel run //:gazelle INFO: Invocation ID: 726c9fd6-f566-4c30-95ef-c4781ad155de ... INFO: Running command line: bazel-bin/gazelle INFO: Streaming build results to: https://btx.cloud.google.com/invocations/726c9fd6-f566-4c30-95ef-c4781ad155de gazelle: WARNING: failed to parse "hello/get_deps.py". The resulting BUILD target may be incorrect. gazelle: ERROR: failed to generate target "//hello:get_deps" of kind "py_library": a target of kind "pyle_py_binary" with the same name already exists. Use the '# gazelle:python_library_naming_convention' directive to change the naming convention. ``` --------- Co-authored-by: Richard Levasseur Co-authored-by: Richard Levasseur --- CHANGELOG.md | 3 ++- gazelle/python/file_parser.go | 35 ++++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65a7d992cb..dd1c2ff67e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,7 +60,8 @@ Unreleased changes template. {#v0-0-0-added} ### Added -* Nothing added. +* (gazelle): Parser failures will now be logged to the terminal. Additional + details can be logged by setting `GAZELLE_VERBOSE=1`. {#v0-0-0-removed} ### Removed diff --git a/gazelle/python/file_parser.go b/gazelle/python/file_parser.go index a2b22c2b8f..9101621639 100644 --- a/gazelle/python/file_parser.go +++ b/gazelle/python/file_parser.go @@ -17,6 +17,7 @@ package python import ( "context" "fmt" + "log" "os" "path/filepath" "strings" @@ -55,7 +56,10 @@ func NewFileParser() *FileParser { return &FileParser{} } -func ParseCode(code []byte) (*sitter.Node, error) { +// ParseCode instantiates a new tree-sitter Parser and parses the python code, returning +// the tree-sitter RootNode. +// It prints a warning if parsing fails. +func ParseCode(code []byte, path string) (*sitter.Node, error) { parser := sitter.NewParser() parser.SetLanguage(python.GetLanguage()) @@ -64,9 +68,27 @@ func ParseCode(code []byte) (*sitter.Node, error) { return nil, err } - return tree.RootNode(), nil + root := tree.RootNode() + if root.HasError() { + log.Printf("WARNING: failed to parse %q. The resulting BUILD target may be incorrect.", path) + + verbose, envExists := os.LookupEnv("GAZELLE_VERBOSE") + if envExists && verbose == "1" { + for i := 0; i < int(root.ChildCount()); i++ { + child := root.Child(i) + if child.IsError() { + log.Printf("Parse error at %+v:\n%+v", child.StartPoint(), child.Content(code)) + log.Printf("The above was parsed as: %v", child.String()) + } + } + } + } + + return root, nil } +// parseMain returns true if the python file has an `if __name__ == "__main__":` block, +// which is a common idiom for python scripts/binaries. func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { for i := 0; i < int(node.ChildCount()); i++ { if err := ctx.Err(); err != nil { @@ -94,6 +116,8 @@ func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { return false } +// parseImportStatement parses a node for an import statement, returning a `module` and a boolean +// representing if the parse was OK or not. func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { switch node.Type() { case sitterNodeTypeDottedName: @@ -112,6 +136,9 @@ func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { return module{}, false } +// parseImportStatements parses a node for import statements, returning true if the node is +// an import statement. It updates FileParser.output.Modules with the `module` that the +// import represents. func (p *FileParser) parseImportStatements(node *sitter.Node) bool { if node.Type() == sitterNodeTypeImportStatement { for j := 1; j < int(node.ChildCount()); j++ { @@ -146,6 +173,8 @@ func (p *FileParser) parseImportStatements(node *sitter.Node) bool { return true } +// parseComments parses a node for comments, returning true if the node is a comment. +// It updates FileParser.output.Comments with the parsed comment. func (p *FileParser) parseComments(node *sitter.Node) bool { if node.Type() == sitterNodeTypeComment { p.output.Comments = append(p.output.Comments, comment(node.Content(p.code))) @@ -180,7 +209,7 @@ func (p *FileParser) parse(ctx context.Context, node *sitter.Node) { } func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) { - rootNode, err := ParseCode(p.code) + rootNode, err := ParseCode(p.code, p.relFilepath) if err != nil { return nil, err }