From 3027674b76ef2a70e7f299a292d234daac9e3940 Mon Sep 17 00:00:00 2001 From: Chaojie Date: Fri, 24 Nov 2023 00:25:37 +0800 Subject: [PATCH 1/2] [flake8-bandit] Implement tarfile-unsafe-members (S202) --- .../test/fixtures/flake8_bandit/S202.py | 47 +++++++++++++++++ .../src/checkers/ast/analyze/expression.rs | 3 ++ crates/ruff_linter/src/codes.rs | 1 + .../src/rules/flake8_bandit/mod.rs | 1 + .../src/rules/flake8_bandit/rules/mod.rs | 2 + .../rules/tarfile_unsafe_members.rs | 52 +++++++++++++++++++ ...s__flake8_bandit__tests__S202_S202.py.snap | 40 ++++++++++++++ ruff.schema.json | 1 + 8 files changed, 147 insertions(+) create mode 100644 crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py create mode 100644 crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs create mode 100644 crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py b/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py new file mode 100644 index 00000000000000..2af3eb54464e0e --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py @@ -0,0 +1,47 @@ +import sys +import tarfile +import tempfile + + +def unsafe_archive_handler(filename): + tar = tarfile.open(filename) + tar.extractall(path=tempfile.mkdtemp()) + tar.close() + + +def managed_members_archive_handler(filename): + tar = tarfile.open(filename) + tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar)) + tar.close() + + +def list_members_archive_handler(filename): + tar = tarfile.open(filename) + tar.extractall(path=tempfile.mkdtemp(), members=[]) + tar.close() + + +def provided_members_archive_handler(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), members=tar) + tar.close() + + +def members_filter(tarfile): + result = [] + for member in tarfile.getmembers(): + if '../' in member.name: + print('Member name container directory traversal sequence') + continue + elif (member.issym() or member.islnk()) and ('../' in member.linkname): + print('Symlink to external resource') + continue + result.append(member) + return result + + +if __name__ == "__main__": + if len(sys.argv) > 1: + filename = sys.argv[1] + unsafe_archive_handler(filename) + managed_members_archive_handler(filename) diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 313218cca2dedb..f418fbf1212d5e 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -615,6 +615,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::DjangoRawSql) { flake8_bandit::rules::django_raw_sql(checker, call); } + if checker.enabled(Rule::TarfileUnsafeMembers) { + flake8_bandit::rules::tarfile_unsafe_members(checker, call); + } if checker.enabled(Rule::UnnecessaryGeneratorList) { flake8_comprehensions::rules::unnecessary_generator_list( checker, expr, func, args, keywords, diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 759ccc54ac49e5..a23c771c7e8bd9 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -595,6 +595,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Flake8Bandit, "112") => (RuleGroup::Stable, rules::flake8_bandit::rules::TryExceptContinue), (Flake8Bandit, "113") => (RuleGroup::Stable, rules::flake8_bandit::rules::RequestWithoutTimeout), (Flake8Bandit, "201") => (RuleGroup::Preview, rules::flake8_bandit::rules::FlaskDebugTrue), + (Flake8Bandit, "202") => (RuleGroup::Stable, rules::flake8_bandit::rules::TarfileUnsafeMembers), (Flake8Bandit, "301") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousPickleUsage), (Flake8Bandit, "302") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousMarshalUsage), (Flake8Bandit, "303") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousInsecureHashUsage), diff --git a/crates/ruff_linter/src/rules/flake8_bandit/mod.rs b/crates/ruff_linter/src/rules/flake8_bandit/mod.rs index feb2dcb5076515..ce041669f1782b 100644 --- a/crates/ruff_linter/src/rules/flake8_bandit/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_bandit/mod.rs @@ -51,6 +51,7 @@ mod tests { #[test_case(Rule::UnsafeYAMLLoad, Path::new("S506.py"))] #[test_case(Rule::WeakCryptographicKey, Path::new("S505.py"))] #[test_case(Rule::DjangoRawSql, Path::new("S611.py"))] + #[test_case(Rule::TarfileUnsafeMembers, Path::new("S202.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs b/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs index b7a655366876f5..ee1de347d61527 100644 --- a/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_bandit/rules/mod.rs @@ -21,6 +21,7 @@ pub(crate) use snmp_insecure_version::*; pub(crate) use snmp_weak_cryptography::*; pub(crate) use ssh_no_host_key_verification::*; pub(crate) use suspicious_function_call::*; +pub(crate) use tarfile_unsafe_members::*; pub(crate) use try_except_continue::*; pub(crate) use try_except_pass::*; pub(crate) use unsafe_yaml_load::*; @@ -49,6 +50,7 @@ mod snmp_insecure_version; mod snmp_weak_cryptography; mod ssh_no_host_key_verification; mod suspicious_function_call; +mod tarfile_unsafe_members; mod try_except_continue; mod try_except_pass; mod unsafe_yaml_load; diff --git a/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs b/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs new file mode 100644 index 00000000000000..a896b41dd9c0cf --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs @@ -0,0 +1,52 @@ +use crate::checkers::ast::Checker; +use ruff_diagnostics::Diagnostic; +use ruff_diagnostics::Violation; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::{self as ast}; +use ruff_text_size::Ranged; + +/// ## What it does +/// Checks for uses of ``tarfile.extractall()`. +/// +/// ## Why is this bad? +/// Use ``tarfile.extractall(members=function_name)`` and define a function +/// that will inspect each member. Discard files that contain a directory +/// traversal sequences such as ``../`` or ``\..`` along with all special filetypes +/// unless you explicitly need them. +/// +/// ## Example +/// ```python +/// import tarfile +/// import tempfile +/// +/// tar = tarfile.open(filename) +/// tar.extractall(path=tempfile.mkdtemp()) +/// tar.close() +/// ``` +/// +/// ## References +/// - [Common Weakness Enumeration: CWE-22](https://cwe.mitre.org/data/definitions/22.html) +/// - [Python Documentation: tarfile](https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall) +#[violation] +pub struct TarfileUnsafeMembers; + +impl Violation for TarfileUnsafeMembers { + #[derive_message_formats] + fn message(&self) -> String { + format!("Uses of `tarfile.extractall()`") + } +} + +/// S202 +pub(crate) fn tarfile_unsafe_members(checker: &mut Checker, call: &ast::ExprCall) { + if checker.semantic().seen(&["tarfile"]) + && call + .func + .as_attribute_expr() + .is_some_and(|attr| attr.attr.as_str() == "extractall") + { + checker + .diagnostics + .push(Diagnostic::new(TarfileUnsafeMembers, call.func.range())); + } +} diff --git a/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap b/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap new file mode 100644 index 00000000000000..0c8f06b74aa5b3 --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap @@ -0,0 +1,40 @@ +--- +source: crates/ruff_linter/src/rules/flake8_bandit/mod.rs +--- +S202.py:8:5: S202 Uses of `tarfile.extractall()` + | +6 | def unsafe_archive_handler(filename): +7 | tar = tarfile.open(filename) +8 | tar.extractall(path=tempfile.mkdtemp()) + | ^^^^^^^^^^^^^^ S202 +9 | tar.close() + | + +S202.py:14:5: S202 Uses of `tarfile.extractall()` + | +12 | def managed_members_archive_handler(filename): +13 | tar = tarfile.open(filename) +14 | tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar)) + | ^^^^^^^^^^^^^^ S202 +15 | tar.close() + | + +S202.py:20:5: S202 Uses of `tarfile.extractall()` + | +18 | def list_members_archive_handler(filename): +19 | tar = tarfile.open(filename) +20 | tar.extractall(path=tempfile.mkdtemp(), members=[]) + | ^^^^^^^^^^^^^^ S202 +21 | tar.close() + | + +S202.py:26:5: S202 Uses of `tarfile.extractall()` + | +24 | def provided_members_archive_handler(filename): +25 | tar = tarfile.open(filename) +26 | tarfile.extractall(path=tempfile.mkdtemp(), members=tar) + | ^^^^^^^^^^^^^^^^^^ S202 +27 | tar.close() + | + + diff --git a/ruff.schema.json b/ruff.schema.json index c4acb767cd5859..8ae7c3fe37fb2c 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3368,6 +3368,7 @@ "S2", "S20", "S201", + "S202", "S3", "S30", "S301", From ea24f53da3b64468be8f17a56def6383db875d69 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 24 Nov 2023 17:36:03 +0000 Subject: [PATCH 2/2] Rewrite docs --- .../test/fixtures/flake8_bandit/S202.py | 18 +++++++ crates/ruff_linter/src/codes.rs | 2 +- .../rules/tarfile_unsafe_members.rs | 51 ++++++++++++++----- ...s__flake8_bandit__tests__S202_S202.py.snap | 9 ++++ 4 files changed, 65 insertions(+), 15 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py b/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py index 2af3eb54464e0e..bba1deda4ab36e 100644 --- a/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py +++ b/crates/ruff_linter/resources/test/fixtures/flake8_bandit/S202.py @@ -27,6 +27,24 @@ def provided_members_archive_handler(filename): tar.close() +def filter_data(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), filter="data") + tar.close() + + +def filter_fully_trusted(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), filter="fully_trusted") + tar.close() + + +def filter_tar(filename): + tar = tarfile.open(filename) + tarfile.extractall(path=tempfile.mkdtemp(), filter="tar") + tar.close() + + def members_filter(tarfile): result = [] for member in tarfile.getmembers(): diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index a23c771c7e8bd9..1e04278e0f15b8 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -595,7 +595,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Flake8Bandit, "112") => (RuleGroup::Stable, rules::flake8_bandit::rules::TryExceptContinue), (Flake8Bandit, "113") => (RuleGroup::Stable, rules::flake8_bandit::rules::RequestWithoutTimeout), (Flake8Bandit, "201") => (RuleGroup::Preview, rules::flake8_bandit::rules::FlaskDebugTrue), - (Flake8Bandit, "202") => (RuleGroup::Stable, rules::flake8_bandit::rules::TarfileUnsafeMembers), + (Flake8Bandit, "202") => (RuleGroup::Preview, rules::flake8_bandit::rules::TarfileUnsafeMembers), (Flake8Bandit, "301") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousPickleUsage), (Flake8Bandit, "302") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousMarshalUsage), (Flake8Bandit, "303") => (RuleGroup::Stable, rules::flake8_bandit::rules::SuspiciousInsecureHashUsage), diff --git a/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs b/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs index a896b41dd9c0cf..1fd35e3c7ad35d 100644 --- a/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs +++ b/crates/ruff_linter/src/rules/flake8_bandit/rules/tarfile_unsafe_members.rs @@ -6,13 +6,19 @@ use ruff_python_ast::{self as ast}; use ruff_text_size::Ranged; /// ## What it does -/// Checks for uses of ``tarfile.extractall()`. +/// Checks for uses of `tarfile.extractall`. /// /// ## Why is this bad? -/// Use ``tarfile.extractall(members=function_name)`` and define a function -/// that will inspect each member. Discard files that contain a directory -/// traversal sequences such as ``../`` or ``\..`` along with all special filetypes -/// unless you explicitly need them. +/// +/// Extracting archives from untrusted sources without prior inspection is +/// a security risk, as maliciously crafted archives may contain files that +/// will be written outside of the target directory. For example, the archive +/// could include files with absolute paths (e.g., `/etc/passwd`), or relative +/// paths with parent directory references (e.g., `../etc/passwd`). +/// +/// On Python 3.12 and later, use `filter='data'` to prevent the most dangerous +/// security issues (see: [PEP 706]). On earlier versions, set the `members` +/// argument to a trusted subset of the archive's members. /// /// ## Example /// ```python @@ -26,7 +32,10 @@ use ruff_text_size::Ranged; /// /// ## References /// - [Common Weakness Enumeration: CWE-22](https://cwe.mitre.org/data/definitions/22.html) -/// - [Python Documentation: tarfile](https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall) +/// - [Python Documentation: `TarFile.extractall`](https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall) +/// - [Python Documentation: Extraction filters](https://docs.python.org/3/library/tarfile.html#tarfile-extraction-filter) +/// +/// [PEP 706]: https://peps.python.org/pep-0706/#backporting-forward-compatibility #[violation] pub struct TarfileUnsafeMembers; @@ -39,14 +48,28 @@ impl Violation for TarfileUnsafeMembers { /// S202 pub(crate) fn tarfile_unsafe_members(checker: &mut Checker, call: &ast::ExprCall) { - if checker.semantic().seen(&["tarfile"]) - && call - .func - .as_attribute_expr() - .is_some_and(|attr| attr.attr.as_str() == "extractall") + if !call + .func + .as_attribute_expr() + .is_some_and(|attr| attr.attr.as_str() == "extractall") { - checker - .diagnostics - .push(Diagnostic::new(TarfileUnsafeMembers, call.func.range())); + return; } + + if call + .arguments + .find_keyword("filter") + .and_then(|keyword| keyword.value.as_string_literal_expr()) + .is_some_and(|value| matches!(value.value.as_str(), "data" | "tar")) + { + return; + } + + if !checker.semantic().seen(&["tarfile"]) { + return; + } + + checker + .diagnostics + .push(Diagnostic::new(TarfileUnsafeMembers, call.func.range())); } diff --git a/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap b/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap index 0c8f06b74aa5b3..fb951d05d1d445 100644 --- a/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap +++ b/crates/ruff_linter/src/rules/flake8_bandit/snapshots/ruff_linter__rules__flake8_bandit__tests__S202_S202.py.snap @@ -37,4 +37,13 @@ S202.py:26:5: S202 Uses of `tarfile.extractall()` 27 | tar.close() | +S202.py:38:5: S202 Uses of `tarfile.extractall()` + | +36 | def filter_fully_trusted(filename): +37 | tar = tarfile.open(filename) +38 | tarfile.extractall(path=tempfile.mkdtemp(), filter="fully_trusted") + | ^^^^^^^^^^^^^^^^^^ S202 +39 | tar.close() + | +