From cf4ab7cba16b25f42d9d6b2464e22eb57df0fa8c Mon Sep 17 00:00:00 2001 From: Shaygan Hooshyari Date: Thu, 16 Jan 2025 07:08:15 +0100 Subject: [PATCH] Parse triple quoted string annotations as if parenthesized (#15387) ## Summary Resolves #9467 Parse quoted annotations as if the string content is inside parenthesis. With this logic `x` and `y` in this example are equal: ```python y: """ int | str """ z: """( int | str ) """ ``` Also this rule only applies to triple quotes([link](https://github.com/python/typing-council/issues/9#issuecomment-1890808610)). This PR is based on the [comments](https://github.com/astral-sh/ruff/issues/9467#issuecomment-2579180991) on the issue. I did one extra change, since we don't want any indentation tokens I am setting the `State::Other` as the initial state of the Lexer. Remaining work: - [x] Add a test case for red-knot. - [x] Add more tests. ## Test Plan Added a test which previously failed because quoted annotation contained indentation. Added an mdtest for red-knot. Updated previous test. Co-authored-by: Dhruv Manilawala Co-authored-by: Micha Reiser --- .../resources/mdtest/annotations/string.md | 37 ++++++++++ .../src/types/string_annotation.rs | 20 ++--- .../resources/test/fixtures/pyflakes/F722.py | 31 ++++++++ .../test/fixtures/pyupgrade/UP037_2.pyi | 3 - ..._rules__pyflakes__tests__F722_F722.py.snap | 64 ++++++++++++++++ ..._rules__pyupgrade__tests__UP037_2.pyi.snap | 73 ++++++++++++++++++- crates/ruff_python_parser/src/lexer.rs | 16 +++- crates/ruff_python_parser/src/lib.rs | 68 ++++++++++++++++- crates/ruff_python_parser/src/parser/mod.rs | 4 +- crates/ruff_python_parser/src/typing.rs | 10 +-- 10 files changed, 295 insertions(+), 31 deletions(-) diff --git a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md index af9f76a7557d3..7718bd9686311 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md +++ b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md @@ -173,3 +173,40 @@ p: "call()" r: "[1, 2]" s: "(1, 2)" ``` + +## Multi line annotation + +Quoted type annotations should be parsed as if surrounded by parentheses. + +```py +def valid( + a1: """( + int | + str + ) + """, + a2: """ + int | + str + """, +): + reveal_type(a1) # revealed: int | str + reveal_type(a2) # revealed: int | str + +def invalid( + # error: [invalid-syntax-in-forward-annotation] + a1: """ + int | +str) +""", + # error: [invalid-syntax-in-forward-annotation] + a2: """ + int) | +str +""", + # error: [invalid-syntax-in-forward-annotation] + a3: """ + (int)) """, +): + pass +``` diff --git a/crates/red_knot_python_semantic/src/types/string_annotation.rs b/crates/red_knot_python_semantic/src/types/string_annotation.rs index 52096a731154e..a3c4dbd956d28 100644 --- a/crates/red_knot_python_semantic/src/types/string_annotation.rs +++ b/crates/red_knot_python_semantic/src/types/string_annotation.rs @@ -1,7 +1,7 @@ use ruff_db::source::source_text; use ruff_python_ast::str::raw_contents; -use ruff_python_ast::{self as ast, ModExpression, StringFlags}; -use ruff_python_parser::{parse_expression_range, Parsed}; +use ruff_python_ast::{self as ast, ModExpression}; +use ruff_python_parser::Parsed; use ruff_text_size::Ranged; use crate::declare_lint; @@ -153,19 +153,9 @@ pub(crate) fn parse_string_annotation( } else if raw_contents(node_text) .is_some_and(|raw_contents| raw_contents == string_literal.as_str()) { - let range_excluding_quotes = string_literal - .range() - .add_start(string_literal.flags.opener_len()) - .sub_end(string_literal.flags.closer_len()); - - // TODO: Support multiline strings like: - // ```py - // x: """ - // int - // | float - // """ = 1 - // ``` - match parse_expression_range(source.as_str(), range_excluding_quotes) { + let parsed = + ruff_python_parser::parse_string_annotation(source.as_str(), string_literal); + match parsed { Ok(parsed) => return Some(parsed), Err(parse_error) => context.report_lint( &INVALID_SYNTAX_IN_FORWARD_ANNOTATION, diff --git a/crates/ruff_linter/resources/test/fixtures/pyflakes/F722.py b/crates/ruff_linter/resources/test/fixtures/pyflakes/F722.py index 35231d60af0fa..e77fb5c402f15 100644 --- a/crates/ruff_linter/resources/test/fixtures/pyflakes/F722.py +++ b/crates/ruff_linter/resources/test/fixtures/pyflakes/F722.py @@ -11,3 +11,34 @@ def g() -> "///": X: """List[int]"""'☃' = [] + +# Type annotations with triple quotes can contain newlines and indentation +# https://github.com/python/typing-council/issues/9 +y: """ + + int | + str +""" + +z: """( + + int | + str +) +""" + +invalid1: """ + int | +str) +""" + +invalid2: """ + int) | +str +""" +invalid3: """ + ((int) +""" +invalid4: """ + (int +""" diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP037_2.pyi b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP037_2.pyi index 8456637581f11..c7071fb7528ac 100644 --- a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP037_2.pyi +++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP037_2.pyi @@ -33,9 +33,6 @@ a: '''\\ list[int]''' = [42] -# TODO: These are valid too. String annotations are assumed to be enclosed in parentheses. -# https://github.com/astral-sh/ruff/issues/9467 - def f(a: ''' list[int] ''' = []): ... diff --git a/crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F722_F722.py.snap b/crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F722_F722.py.snap index 3a41c4a06203c..05de73ad5ca72 100644 --- a/crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F722_F722.py.snap +++ b/crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F722_F722.py.snap @@ -13,4 +13,68 @@ F722.py:13:4: F722 Syntax error in forward annotation: `List[int]☃` | 13 | X: """List[int]"""'☃' = [] | ^^^^^^^^^^^^^^^^^^ F722 +14 | +15 | # Type annotations with triple quotes can contain newlines and indentation + | + +F722.py:30:11: F722 Syntax error in forward annotation: ` + int | +str) +` + | +28 | """ +29 | +30 | invalid1: """ + | ___________^ +31 | | int | +32 | | str) +33 | | """ + | |___^ F722 +34 | +35 | invalid2: """ + | + +F722.py:35:11: F722 Syntax error in forward annotation: ` + int) | +str +` + | +33 | """ +34 | +35 | invalid2: """ + | ___________^ +36 | | int) | +37 | | str +38 | | """ + | |___^ F722 +39 | invalid3: """ +40 | ((int) + | + +F722.py:39:11: F722 Syntax error in forward annotation: ` + ((int) +` + | +37 | str +38 | """ +39 | invalid3: """ + | ___________^ +40 | | ((int) +41 | | """ + | |___^ F722 +42 | invalid4: """ +43 | (int + | + +F722.py:42:11: F722 Syntax error in forward annotation: ` + (int +` + | +40 | ((int) +41 | """ +42 | invalid4: """ + | ___________^ +43 | | (int +44 | | """ + | |___^ F722 | diff --git a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP037_2.pyi.snap b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP037_2.pyi.snap index bffd9bbf7d236..dd66f98b5180f 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP037_2.pyi.snap +++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP037_2.pyi.snap @@ -158,4 +158,75 @@ UP037_2.pyi:32:4: UP037 [*] Remove quotes from type annotation 33 |+list[int]) = [42] 34 34 | 35 35 | -36 36 | # TODO: These are valid too. String annotations are assumed to be enclosed in parentheses. +36 36 | def f(a: ''' + +UP037_2.pyi:36:10: UP037 [*] Remove quotes from type annotation + | +36 | def f(a: ''' + | __________^ +37 | | list[int] +38 | | ''' = []): ... + | |_______^ UP037 + | + = help: Remove quotes + +ℹ Safe fix +33 33 | list[int]''' = [42] +34 34 | +35 35 | +36 |-def f(a: ''' + 36 |+def f(a: +37 37 | list[int] +38 |- ''' = []): ... + 38 |+ = []): ... +39 39 | +40 40 | +41 41 | def f(a: Foo[''' + +UP037_2.pyi:41:14: UP037 [*] Remove quotes from type annotation + | +41 | def f(a: Foo[''' + | ______________^ +42 | | Bar +43 | | [ +44 | | Multi | +45 | | Line +46 | | ] # Comment''']): ... + | |___________________^ UP037 + | + = help: Remove quotes + +ℹ Safe fix +38 38 | ''' = []): ... +39 39 | +40 40 | +41 |-def f(a: Foo[''' + 41 |+def f(a: Foo[( +42 42 | Bar +43 43 | [ +44 44 | Multi | +45 45 | Line +46 |- ] # Comment''']): ... + 46 |+ ] # Comment + 47 |+)]): ... +47 48 | +48 49 | +49 50 | a: '''list + +UP037_2.pyi:49:4: UP037 [*] Remove quotes from type annotation + | +49 | a: '''list + | ____^ +50 | | [int]''' = [42] + | |________^ UP037 + | + = help: Remove quotes + +ℹ Safe fix +46 46 | ] # Comment''']): ... +47 47 | +48 48 | +49 |-a: '''list +50 |-[int]''' = [42] + 49 |+a: (list + 50 |+[int]) = [42] diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 4bc4bb43194ac..0bd8472daf8d2 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -84,15 +84,21 @@ impl<'src> Lexer<'src> { "Lexer only supports files with a size up to 4GB" ); + let (state, nesting) = if mode == Mode::ParenthesizedExpression { + (State::Other, 1) + } else { + (State::AfterNewline, 0) + }; + let mut lexer = Lexer { source, cursor: Cursor::new(source), - state: State::AfterNewline, + state, current_kind: TokenKind::EndOfFile, current_range: TextRange::empty(start_offset), current_value: TokenValue::None, current_flags: TokenFlags::empty(), - nesting: 0, + nesting, indentations: Indentations::default(), pending_indentation: None, mode, @@ -1309,7 +1315,11 @@ impl<'src> Lexer<'src> { fn consume_end(&mut self) -> TokenKind { // We reached end of file. // First of all, we need all nestings to be finished. - if self.nesting > 0 { + // For Mode::ParenthesizedExpression we start with nesting level 1. + // So we check if we end with that level. + let init_nesting = u32::from(self.mode == Mode::ParenthesizedExpression); + + if self.nesting > init_nesting { // Reset the nesting to avoid going into infinite loop. self.nesting = 0; return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range())); diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index df11678118000..3571804bad0af 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -72,7 +72,9 @@ pub use crate::token::{Token, TokenKind}; use crate::parser::Parser; -use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite}; +use ruff_python_ast::{ + Expr, Mod, ModExpression, ModModule, PySourceType, StringFlags, StringLiteral, Suite, +}; use ruff_python_trivia::CommentRanges; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -166,6 +168,65 @@ pub fn parse_expression_range( .into_result() } +/// Parses a Python expression as if it is parenthesized. +/// +/// It behaves similarly to [`parse_expression_range`] but allows what would be valid within parenthesis +/// +/// # Example +/// +/// Parsing an expression that would be valid within parenthesis: +/// +/// ``` +/// use ruff_python_parser::parse_parenthesized_expression_range; +/// # use ruff_text_size::{TextRange, TextSize}; +/// +/// let parsed = parse_parenthesized_expression_range("'''\n int | str'''", TextRange::new(TextSize::new(3), TextSize::new(14))); +/// assert!(parsed.is_ok()); +pub fn parse_parenthesized_expression_range( + source: &str, + range: TextRange, +) -> Result, ParseError> { + let source = &source[..range.end().to_usize()]; + let parsed = + Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse(); + parsed.try_into_expression().unwrap().into_result() +} + +/// Parses a Python expression from a string annotation. +/// +/// # Example +/// +/// Parsing a string annotation: +/// +/// ``` +/// use ruff_python_parser::parse_string_annotation; +/// use ruff_python_ast::{StringLiteral, StringLiteralFlags}; +/// use ruff_text_size::{TextRange, TextSize}; +/// +/// let string = StringLiteral { +/// value: "'''\n int | str'''".to_string().into_boxed_str(), +/// flags: StringLiteralFlags::default(), +/// range: TextRange::new(TextSize::new(0), TextSize::new(16)), +/// }; +/// let parsed = parse_string_annotation("'''\n int | str'''", &string); +/// assert!(!parsed.is_ok()); +/// ``` +pub fn parse_string_annotation( + source: &str, + string: &StringLiteral, +) -> Result, ParseError> { + let range = string + .range() + .add_start(string.flags.opener_len()) + .sub_end(string.flags.closer_len()); + let source = &source[..range.end().to_usize()]; + if string.flags.is_triple_quoted() { + parse_parenthesized_expression_range(source, range) + } else { + parse_expression_range(source, range) + } +} + /// Parse the given Python source code using the specified [`Mode`]. /// /// This function is the most general function to parse Python code. Based on the [`Mode`] supplied, @@ -582,6 +643,11 @@ pub enum Mode { /// The code consists of a single expression. Expression, + /// The code consists of a single expression and is parsed as if it is parenthesized. The parentheses themselves aren't required. + /// This allows for having valid multiline expression without the need of parentheses + /// and is specifically useful for parsing string annotations. + ParenthesizedExpression, + /// The code consists of a sequence of statements which can include the /// escape commands that are part of IPython syntax. /// diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 08c85f7a07148..d4528c8c3c4a0 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -74,7 +74,9 @@ impl<'src> Parser<'src> { /// Consumes the [`Parser`] and returns the parsed [`Parsed`]. pub(crate) fn parse(mut self) -> Parsed { let syntax = match self.mode { - Mode::Expression => Mod::Expression(self.parse_single_expression()), + Mode::Expression | Mode::ParenthesizedExpression => { + Mod::Expression(self.parse_single_expression()) + } Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()), }; diff --git a/crates/ruff_python_parser/src/typing.rs b/crates/ruff_python_parser/src/typing.rs index 76e25ac880bd2..ffc7dce741714 100644 --- a/crates/ruff_python_parser/src/typing.rs +++ b/crates/ruff_python_parser/src/typing.rs @@ -2,10 +2,10 @@ use ruff_python_ast::relocate::relocate_expr; use ruff_python_ast::str::raw_contents; -use ruff_python_ast::{Expr, ExprStringLiteral, ModExpression, StringFlags, StringLiteral}; +use ruff_python_ast::{Expr, ExprStringLiteral, ModExpression, StringLiteral}; use ruff_text_size::Ranged; -use crate::{parse_expression, parse_expression_range, ParseError, Parsed}; +use crate::{parse_expression, parse_string_annotation, ParseError, Parsed}; type AnnotationParseResult = Result; @@ -81,12 +81,8 @@ fn parse_simple_type_annotation( string_literal: &StringLiteral, source: &str, ) -> AnnotationParseResult { - let range_excluding_quotes = string_literal - .range() - .add_start(string_literal.flags.opener_len()) - .sub_end(string_literal.flags.closer_len()); Ok(ParsedAnnotation { - parsed: parse_expression_range(source, range_excluding_quotes)?, + parsed: parse_string_annotation(source, string_literal)?, kind: AnnotationKind::Simple, }) }