Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] Ambiguous pattern passed to pytest.raises() (RUF043) #14966

Merged
merged 10 commits into from
Dec 18, 2024
87 changes: 87 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF043.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import re
import pytest

def test_foo():

### Errors

with pytest.raises(FooAtTheEnd, match="foo."): ...
with pytest.raises(PackageExtraSpecifier, match="Install `foo[bar]` to enjoy all features"): ...
with pytest.raises(InnocentQuestion, match="Did you mean to use `Literal` instead?"): ...

with pytest.raises(StringConcatenation, match="Huh"
"?"): ...
with pytest.raises(ManuallyEscapedWindowsPathToDotFile, match="C:\\\\Users\\\\Foo\\\\.config"): ...

with pytest.raises(MiddleDot, match="foo.bar"): ...
with pytest.raises(EndDot, match="foobar."): ...
with pytest.raises(EndBackslash, match="foobar\\"): ...
with pytest.raises(EscapedFollowedByUnescaped, match="foo\\.*bar"): ...
with pytest.raises(UnescapedFollowedByEscaped, match="foo.\\*bar"): ...


## Metasequences

with pytest.raises(StartOfInput, match="foo\\Abar"): ...
with pytest.raises(WordBoundary, match="foo\\bbar"): ...
with pytest.raises(NonWordBoundary, match="foo\\Bbar"): ...
with pytest.raises(Digit, match="foo\\dbar"): ...
with pytest.raises(NonDigit, match="foo\\Dbar"): ...
with pytest.raises(Whitespace, match="foo\\sbar"): ...
with pytest.raises(NonWhitespace, match="foo\\Sbar"): ...
with pytest.raises(WordCharacter, match="foo\\wbar"): ...
with pytest.raises(NonWordCharacter, match="foo\\Wbar"): ...
with pytest.raises(EndOfInput, match="foo\\zbar"): ...

with pytest.raises(StartOfInput2, match="foobar\\A"): ...
with pytest.raises(WordBoundary2, match="foobar\\b"): ...
with pytest.raises(NonWordBoundary2, match="foobar\\B"): ...
with pytest.raises(Digit2, match="foobar\\d"): ...
with pytest.raises(NonDigit2, match="foobar\\D"): ...
with pytest.raises(Whitespace2, match="foobar\\s"): ...
with pytest.raises(NonWhitespace2, match="foobar\\S"): ...
with pytest.raises(WordCharacter2, match="foobar\\w"): ...
with pytest.raises(NonWordCharacter2, match="foobar\\W"): ...
with pytest.raises(EndOfInput2, match="foobar\\z"): ...


### Acceptable false positives

with pytest.raises(NameEscape, match="\\N{EN DASH}"): ...


### No errors

with pytest.raises(NoMatch): ...
with pytest.raises(NonLiteral, match=pattern): ...
with pytest.raises(FunctionCall, match=frobnicate("qux")): ...
with pytest.raises(ReEscaped, match=re.escape("foobar")): ...
with pytest.raises(RawString, match=r"fo()bar"): ...
with pytest.raises(RawStringPart, match=r"foo" '\bar'): ...
with pytest.raises(NoMetacharacters, match="foobar"): ...

with pytest.raises(ManuallyEscaped, match="some\\.fully\\.qualified\\.name"): ...
with pytest.raises(ManuallyEscapedWindowsPath, match="C:\\\\Users\\\\Foo\\\\file\\.py"): ...

with pytest.raises(MiddleEscapedDot, match="foo\\.bar"): ...
with pytest.raises(MiddleEscapedBackslash, match="foo\\\\bar"): ...
with pytest.raises(EndEscapedDot, match="foobar\\."): ...
with pytest.raises(EndEscapedBackslash, match="foobar\\\\"): ...


## Not-so-special metasequences

with pytest.raises(Alert, match="\\f"): ...
with pytest.raises(FormFeed, match="\\f"): ...
with pytest.raises(Newline, match="\\n"): ...
with pytest.raises(CarriageReturn, match="\\r"): ...
with pytest.raises(Tab, match="\\t"): ...
with pytest.raises(VerticalTab, match="\\v"): ...
with pytest.raises(HexEscape, match="\\xFF"): ...
with pytest.raises(_16BitUnicodeEscape, match="\\FFFF"): ...
with pytest.raises(_32BitUnicodeEscape, match="\\0010FFFF"): ...


## Work by accident

with pytest.raises(OctalEscape, match="\\042"): ...
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::BatchedWithoutExplicitStrict) {
flake8_bugbear::rules::batched_without_explicit_strict(checker, call);
}
if checker.enabled(Rule::PytestRaisesAmbiguousPattern) {
ruff::rules::pytest_raises_ambiguous_pattern(checker, call);
}
}
Expr::Dict(dict) => {
if checker.any_enabled(&[
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "039") => (RuleGroup::Preview, rules::ruff::rules::UnrawRePattern),
(Ruff, "040") => (RuleGroup::Preview, rules::ruff::rules::InvalidAssertMessageLiteralArgument),
(Ruff, "041") => (RuleGroup::Preview, rules::ruff::rules::UnnecessaryNestedLiteral),
(Ruff, "043") => (RuleGroup::Preview, rules::ruff::rules::PytestRaisesAmbiguousPattern),
(Ruff, "046") => (RuleGroup::Preview, rules::ruff::rules::UnnecessaryCastToInt),
(Ruff, "048") => (RuleGroup::Preview, rules::ruff::rules::MapIntVersionParsing),
(Ruff, "051") => (RuleGroup::Preview, rules::ruff::rules::IfKeyInDictDel),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ impl Violation for PytestAssertInExcept {
/// ...
/// ```
///
/// References
/// ## References
/// - [`pytest` documentation: `pytest.fail`](https://docs.pytest.org/en/latest/reference/reference.html#pytest-fail)
#[derive(ViolationMetadata)]
pub(crate) struct PytestAssertAlwaysFalse;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ impl AlwaysFixableViolation for PytestIncorrectMarkParenthesesStyle {
///
/// ## References
/// - [`pytest` documentation: `pytest.mark.usefixtures`](https://docs.pytest.org/en/latest/reference/reference.html#pytest-mark-usefixtures)

#[derive(ViolationMetadata)]
pub(crate) struct PytestUseFixturesWithoutParameters;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ impl Violation for PytestRaisesWithoutException {
}
}

fn is_pytest_raises(func: &Expr, semantic: &SemanticModel) -> bool {
pub(crate) fn is_pytest_raises(func: &Expr, semantic: &SemanticModel) -> bool {
semantic
.resolve_qualified_name(func)
.is_some_and(|qualified_name| matches!(qualified_name.segments(), ["pytest", "raises"]))
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ mod tests {
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))]
#[test_case(Rule::UnnecessaryCastToInt, Path::new("RUF046.py"))]
#[test_case(Rule::PytestRaisesAmbiguousPattern, Path::new("RUF043.py"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub(crate) use never_union::*;
pub(crate) use none_not_at_end_of_union::*;
pub(crate) use parenthesize_chained_operators::*;
pub(crate) use post_init_default::*;
pub(crate) use pytest_raises_ambiguous_pattern::*;
pub(crate) use quadratic_list_summation::*;
pub(crate) use redirected_noqa::*;
pub(crate) use redundant_bool_literal::*;
Expand Down Expand Up @@ -71,6 +72,7 @@ mod never_union;
mod none_not_at_end_of_union;
mod parenthesize_chained_operators;
mod post_init_default;
mod pytest_raises_ambiguous_pattern;
mod quadratic_list_summation;
mod redirected_noqa;
mod redundant_bool_literal;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
use crate::checkers::ast::Checker;
use crate::rules::flake8_pytest_style::rules::is_pytest_raises;
use itertools::Itertools;
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::{ExprCall, Keyword, StringLiteralValue};

/// ## What it does
/// Checks for non-raw literal string arguments passed to the `match` parameter
/// of `pytest.raises()` where the string contains at least one unescaped
/// regex metacharacter.
///
/// ## Why is this bad?
/// The `match` argument is implicitly converted to a regex under the hood.
/// It should be made explicit whether the string is meant to be a regex or a "plain" pattern
/// by prefixing the string with the `r` suffix, escaping the metacharacter(s)
/// in the string using backslashes, or wrapping the entire string in a call to
/// `re.escape()`.
///
/// ## Example
///
/// ```python
/// import pytest
///
///
/// with pytest.raises(Exception, match="A full sentence."):
/// do_thing_that_raises()
/// ```
///
/// Use instead:
///
/// ```python
/// import pytest
///
///
/// with pytest.raises(Exception, match=r"A full sentence."):
/// do_thing_that_raises()
/// ```
///
/// Alternatively:
///
/// ```python
/// import pytest
/// import re
///
///
/// with pytest.raises(Exception, match=re.escape("A full sentence.")):
/// do_thing_that_raises()
/// ```
///
/// or:
///
/// ```python
/// import pytest
/// import re
///
///
/// with pytest.raises(Exception, "A full sentence\\."):
/// do_thing_that_raises()
/// ```
///
/// ## References
/// - [Python documentation: `re.escape`](https://docs.python.org/3/library/re.html#re.escape)
/// - [`pytest` documentation: `pytest.raises`](https://docs.pytest.org/en/latest/reference/reference.html#pytest-raises)
#[derive(ViolationMetadata)]
pub(crate) struct PytestRaisesAmbiguousPattern;

impl Violation for PytestRaisesAmbiguousPattern {
#[derive_message_formats]
fn message(&self) -> String {
"Pattern passed to `match=` contains metacharacters but is neither escaped nor raw"
.to_string()
}

fn fix_title(&self) -> Option<String> {
Some("Use a raw string or `re.escape()` to make the intention explicit".to_string())
}
}

/// RUF043
pub(crate) fn pytest_raises_ambiguous_pattern(checker: &mut Checker, call: &ExprCall) {
if !is_pytest_raises(&call.func, checker.semantic()) {
return;
}

let Some(Keyword { value, .. }) = call.arguments.find_keyword("match") else {
return;
};

let Some(string) = value.as_string_literal_expr() else {
return;
};

let any_part_is_raw = string.value.iter().any(|part| part.flags.prefix().is_raw());

if any_part_is_raw || !string_has_unescaped_metacharacters(&string.value) {
return;
}

let diagnostic = Diagnostic::new(PytestRaisesAmbiguousPattern, string.range);

checker.diagnostics.push(diagnostic);
}

fn string_has_unescaped_metacharacters(value: &StringLiteralValue) -> bool {
let mut escaped = false;
let mut last = '\x00';
AlexWaygood marked this conversation as resolved.
Show resolved Hide resolved

for (current, next) in value.chars().tuple_windows() {
last = next;

if escaped {
if escaped_char_is_regex_metasequence(current) {
return true;
}

escaped = false;
continue;
}

if current == '\\' {
escaped = true;
continue;
}

if char_is_regex_metacharacter(current) {
return true;
}
}

if escaped {
escaped_char_is_regex_metasequence(last)
} else {
char_is_regex_metacharacter(last)
}
MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
}

/// Whether the sequence `\<c>` means anything special:
///
/// * `\A`: Start of input
/// * `\b`, `\B`: Word boundary and non-word-boundary
/// * `\d`, `\D`: Digit and non-digit
/// * `\s`, `\S`: Whitespace and non-whitespace
/// * `\w`, `\W`: Word and non-word character
/// * `\z`: End of input
///
/// `\u`, `\U`, `\N`, `\x`, `\a`, `\f`, `\n`, `\r`, `\t`, `\v`
/// are also valid in normal strings and thus do not count.
/// `\b` means backspace only in character sets,
/// while backreferences (e.g., `\1`) are not valid without groups,
/// both of which should be caught in [`string_has_unescaped_metacharacters`].
const fn escaped_char_is_regex_metasequence(c: char) -> bool {
matches!(c, 'A' | 'b' | 'B' | 'd' | 'D' | 's' | 'S' | 'w' | 'W' | 'z')
}

const fn char_is_regex_metacharacter(c: char) -> bool {
matches!(
c,
'.' | '^' | '$' | '*' | '+' | '?' | '{' | '[' | '\\' | '|' | '('
)
}
Loading
Loading