From ae3d816bca8c52639f7250bc883708359a6ef17c Mon Sep 17 00:00:00 2001 From: dylwil3 Date: Fri, 22 Nov 2024 11:04:22 -0600 Subject: [PATCH 1/3] safe autofix when no backslashes or u-prefix present --- .../src/rules/ruff/rules/unraw_re_pattern.rs | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs b/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs index 518bf8aafe8ec..3c08eeb7938d7 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs @@ -1,13 +1,16 @@ use std::fmt::{Display, Formatter}; use std::str::FromStr; -use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::{ BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, StringLiteral, }; use ruff_python_semantic::{Modules, SemanticModel}; +use memchr::memchr; +use ruff_text_size::Ranged; + use crate::checkers::ast::Checker; /// ## What it does @@ -41,6 +44,7 @@ pub struct UnrawRePattern { } impl Violation for UnrawRePattern { + const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes; #[derive_message_formats] fn message(&self) -> String { let Self { module, func, kind } = &self; @@ -158,8 +162,26 @@ fn check_string(checker: &mut Checker, literal: &StringLiteral, module: RegexMod let kind = PatternKind::String; let func = func.to_string(); let range = literal.range; - let diagnostic = Diagnostic::new(UnrawRePattern { module, func, kind }, range); - + let mut diagnostic = Diagnostic::new(UnrawRePattern { module, func, kind }, range); + + if + // The (no-op) `u` prefix is a syntax error when combined with `r` + !literal.flags.prefix().is_unicode() + && memchr( + b'\\', + // We are looking for backslash characters + // in the raw source code here, because `\n` + // gets converted to a single character already + // at the lexing stage. + checker.locator().slice(literal.range()).as_bytes(), + ) + .is_none() + { + diagnostic.set_fix(Fix::safe_edit(Edit::insertion( + "r".to_string(), + literal.range().start(), + ))); + } checker.diagnostics.push(diagnostic); } From fdbba787fbed9c6f1e5e1c89521956246652573c Mon Sep 17 00:00:00 2001 From: dylwil3 Date: Fri, 22 Nov 2024 12:11:35 -0600 Subject: [PATCH 2/3] update snapshots --- ...uff__tests__preview__RUF039_RUF039.py.snap | 85 ++++++- ...sts__preview__RUF039_RUF039_concat.py.snap | 217 ++++++++++++++++-- 2 files changed, 275 insertions(+), 27 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF039_RUF039.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF039_RUF039.py.snap index 0d3b5d4863b88..347cf257d1747 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF039_RUF039.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF039_RUF039.py.snap @@ -1,8 +1,7 @@ --- source: crates/ruff_linter/src/rules/ruff/mod.rs -snapshot_kind: text --- -RUF039.py:5:12: RUF039 First argument to `re.compile()` is not raw string +RUF039.py:5:12: RUF039 [*] First argument to `re.compile()` is not raw string | 4 | # Errors 5 | re.compile('single free-spacing', flags=re.X) @@ -12,6 +11,16 @@ RUF039.py:5:12: RUF039 First argument to `re.compile()` is not raw string | = help: Replace with raw string +ℹ Safe fix +2 2 | import regex +3 3 | +4 4 | # Errors +5 |-re.compile('single free-spacing', flags=re.X) + 5 |+re.compile(r'single free-spacing', flags=re.X) +6 6 | re.findall('si\ngle') +7 7 | re.finditer("dou\ble") +8 8 | re.fullmatch('''t\riple single''') + RUF039.py:6:12: RUF039 First argument to `re.findall()` is not raw string | 4 | # Errors @@ -56,7 +65,7 @@ RUF039.py:9:10: RUF039 First argument to `re.match()` is not raw string | = help: Replace with raw string -RUF039.py:10:11: RUF039 First argument to `re.search()` is not raw string +RUF039.py:10:11: RUF039 [*] First argument to `re.search()` is not raw string | 8 | re.fullmatch('''t\riple single''') 9 | re.match("""\triple double""") @@ -67,7 +76,17 @@ RUF039.py:10:11: RUF039 First argument to `re.search()` is not raw string | = help: Replace with raw string -RUF039.py:11:10: RUF039 First argument to `re.split()` is not raw string +ℹ Safe fix +7 7 | re.finditer("dou\ble") +8 8 | re.fullmatch('''t\riple single''') +9 9 | re.match("""\triple double""") +10 |-re.search('two', 'args') + 10 |+re.search(r'two', 'args') +11 11 | re.split("raw", r'second') +12 12 | re.sub(u'''nicode''', u"f(?i)rst") +13 13 | re.subn(b"""ytes are""", f"\u006e") + +RUF039.py:11:10: RUF039 [*] First argument to `re.split()` is not raw string | 9 | re.match("""\triple double""") 10 | re.search('two', 'args') @@ -78,6 +97,16 @@ RUF039.py:11:10: RUF039 First argument to `re.split()` is not raw string | = help: Replace with raw string +ℹ Safe fix +8 8 | re.fullmatch('''t\riple single''') +9 9 | re.match("""\triple double""") +10 10 | re.search('two', 'args') +11 |-re.split("raw", r'second') + 11 |+re.split(r"raw", r'second') +12 12 | re.sub(u'''nicode''', u"f(?i)rst") +13 13 | re.subn(b"""ytes are""", f"\u006e") +14 14 | + RUF039.py:12:8: RUF039 First argument to `re.sub()` is not raw string | 10 | re.search('two', 'args') @@ -99,7 +128,7 @@ RUF039.py:13:9: RUF039 First argument to `re.subn()` is not raw bytes literal | = help: Replace with raw bytes literal -RUF039.py:15:15: RUF039 First argument to `regex.compile()` is not raw string +RUF039.py:15:15: RUF039 [*] First argument to `regex.compile()` is not raw string | 13 | re.subn(b"""ytes are""", f"\u006e") 14 | @@ -110,6 +139,16 @@ RUF039.py:15:15: RUF039 First argument to `regex.compile()` is not raw string | = help: Replace with raw string +ℹ Safe fix +12 12 | re.sub(u'''nicode''', u"f(?i)rst") +13 13 | re.subn(b"""ytes are""", f"\u006e") +14 14 | +15 |-regex.compile('single free-spacing', flags=regex.X) + 15 |+regex.compile(r'single free-spacing', flags=regex.X) +16 16 | regex.findall('si\ngle') +17 17 | regex.finditer("dou\ble") +18 18 | regex.fullmatch('''t\riple single''') + RUF039.py:16:15: RUF039 First argument to `regex.findall()` is not raw string | 15 | regex.compile('single free-spacing', flags=regex.X) @@ -153,7 +192,7 @@ RUF039.py:19:13: RUF039 First argument to `regex.match()` is not raw string | = help: Replace with raw string -RUF039.py:20:14: RUF039 First argument to `regex.search()` is not raw string +RUF039.py:20:14: RUF039 [*] First argument to `regex.search()` is not raw string | 18 | regex.fullmatch('''t\riple single''') 19 | regex.match("""\triple double""") @@ -164,7 +203,17 @@ RUF039.py:20:14: RUF039 First argument to `regex.search()` is not raw string | = help: Replace with raw string -RUF039.py:21:13: RUF039 First argument to `regex.split()` is not raw string +ℹ Safe fix +17 17 | regex.finditer("dou\ble") +18 18 | regex.fullmatch('''t\riple single''') +19 19 | regex.match("""\triple double""") +20 |-regex.search('two', 'args') + 20 |+regex.search(r'two', 'args') +21 21 | regex.split("raw", r'second') +22 22 | regex.sub(u'''nicode''', u"f(?i)rst") +23 23 | regex.subn(b"""ytes are""", f"\u006e") + +RUF039.py:21:13: RUF039 [*] First argument to `regex.split()` is not raw string | 19 | regex.match("""\triple double""") 20 | regex.search('two', 'args') @@ -175,6 +224,16 @@ RUF039.py:21:13: RUF039 First argument to `regex.split()` is not raw string | = help: Replace with raw string +ℹ Safe fix +18 18 | regex.fullmatch('''t\riple single''') +19 19 | regex.match("""\triple double""") +20 20 | regex.search('two', 'args') +21 |-regex.split("raw", r'second') + 21 |+regex.split(r"raw", r'second') +22 22 | regex.sub(u'''nicode''', u"f(?i)rst") +23 23 | regex.subn(b"""ytes are""", f"\u006e") +24 24 | + RUF039.py:22:11: RUF039 First argument to `regex.sub()` is not raw string | 20 | regex.search('two', 'args') @@ -196,7 +255,7 @@ RUF039.py:23:12: RUF039 First argument to `regex.subn()` is not raw bytes litera | = help: Replace with raw bytes literal -RUF039.py:25:16: RUF039 First argument to `regex.template()` is not raw string +RUF039.py:25:16: RUF039 [*] First argument to `regex.template()` is not raw string | 23 | regex.subn(b"""ytes are""", f"\u006e") 24 | @@ -209,3 +268,13 @@ RUF039.py:25:16: RUF039 First argument to `regex.template()` is not raw string | |___^ RUF039 | = help: Replace with raw string + +ℹ Safe fix +22 22 | regex.sub(u'''nicode''', u"f(?i)rst") +23 23 | regex.subn(b"""ytes are""", f"\u006e") +24 24 | +25 |-regex.template("""(?m) + 25 |+regex.template(r"""(?m) +26 26 | (?:ulti)? +27 27 | (?=(? Date: Fri, 22 Nov 2024 12:51:55 -0600 Subject: [PATCH 3/3] use contains not memchr --- .../src/rules/ruff/rules/unraw_re_pattern.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs b/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs index 3c08eeb7938d7..40014e4fad4a3 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs @@ -8,7 +8,6 @@ use ruff_python_ast::{ }; use ruff_python_semantic::{Modules, SemanticModel}; -use memchr::memchr; use ruff_text_size::Ranged; use crate::checkers::ast::Checker; @@ -167,15 +166,11 @@ fn check_string(checker: &mut Checker, literal: &StringLiteral, module: RegexMod if // The (no-op) `u` prefix is a syntax error when combined with `r` !literal.flags.prefix().is_unicode() - && memchr( - b'\\', - // We are looking for backslash characters - // in the raw source code here, because `\n` - // gets converted to a single character already - // at the lexing stage. - checker.locator().slice(literal.range()).as_bytes(), - ) - .is_none() + // We are looking for backslash characters + // in the raw source code here, because `\n` + // gets converted to a single character already + // at the lexing stage. + &&!checker.locator().slice(literal.range()).contains('\\') { diagnostic.set_fix(Fix::safe_edit(Edit::insertion( "r".to_string(),