From f25e6f18a01c5a2cf271ede0fc6652205e6d51b2 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 10 Sep 2024 12:11:54 -0400 Subject: [PATCH] Dynamic Instrumentation Redactor component This component determines whether a variable, attribute or hash element should be redacted based on the identifier name/key name and the type of the value. Unit tests are included. The Redactor component technically depends on DI settings but they are mocked out in the unit tests. --- .rubocop.yml | 2 + lib/datadog/di/redactor.rb | 185 +++++++++++++++++++++++++++++++ sig/datadog/di/redactor.rbs | 27 +++++ spec/datadog/di/redactor_spec.rb | 155 ++++++++++++++++++++++++++ 4 files changed, 369 insertions(+) create mode 100644 lib/datadog/di/redactor.rb create mode 100644 sig/datadog/di/redactor.rbs create mode 100644 spec/datadog/di/redactor_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index 0cde8f175ae..fe48739255b 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -24,6 +24,8 @@ AllCops: - 'spec/**/**/interesting_backtrace_helper.rb' # This file needs quite a few bizarre code patterns by design - 'vendor/bundle/**/*' - 'spec/datadog/tracing/contrib/grpc/support/gen/**/*.rb' # Skip protoc autogenerated code + - lib/datadog/di/**/* + - spec/datadog/di/**/* NewCops: disable # Don't allow new cops to be enabled implicitly. SuggestExtensions: false # Stop pushing suggestions constantly. diff --git a/lib/datadog/di/redactor.rb b/lib/datadog/di/redactor.rb new file mode 100644 index 00000000000..0c3bdb52f66 --- /dev/null +++ b/lib/datadog/di/redactor.rb @@ -0,0 +1,185 @@ +# frozen_string_literal: true + +module Datadog + module DI + # Provides logic to identify sensitive information in snapshots captured + # by dynamic instrumentation. + # + # Redaction can be performed based on identifier or attribute name, + # or class name of said identifier or attribute. Redaction does not take + # into account variable values. + # + # There is a built-in list of identifier names which will be subject to + # redaction. Additional names can be provided by the user via the + # settings.dynamic_instrumentation.redacted_identifiers setting or + # the DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS environment + # variable. Currently no class names are subject to redaction by default; + # class names can be provided via the + # settings.dynamic_instrumentation.redacted_type_names setting or + # DD_DYNAMIC_INSTRUMENTATION_REDACTED_TYPES environment variable. + # + # Redacted identifiers must match exactly to an attribute name, a key + # in a hash or a variable name. Redacted types can either be matched + # exactly or, if the name is suffixed with an asterisk (*), any class + # whose name contains the specified prefix will be subject to redaction. + # + # When specifying class (type) names to be redacted, user must specify + # fully-qualified names. For example, if `Token` or `Token*` are + # specified to be redacted, instances of ::Token will be redacted + # but instances of ::Foo::Token will not be. To redact the latter, + # specify `Foo::Token` or `::Foo::Token` as redacted types. + # + # This class does not perform redaction itself (i.e., value replacement + # with a placeholder). This replacement is performed by Serializer. + # + # @api private + class Redactor + def initialize(settings) + @settings = settings + end + + attr_reader :settings + + def redact_identifier?(name) + redacted_identifiers.include?(normalize(name)) + end + + def redact_type?(value) + # Classses can be nameless, do not attempt to redact in that case. + if (cls_name = value.class.name) + redacted_type_names_regexp.match?(cls_name) + else + false + end + end + + private + + def redacted_identifiers + @redacted_identifiers ||= begin + names = DEFAULT_REDACTED_IDENTIFIERS + settings.dynamic_instrumentation.redacted_identifiers + names.map! do |name| + normalize(name) + end + Set.new(names) + end + end + + def redacted_type_names_regexp + @redacted_type_names_regexp ||= begin + names = settings.dynamic_instrumentation.redacted_type_names + names = names.map do |name| + if name.start_with?("::") + # :: prefix is redundant, all names are expected to be + # fully-qualified. + name = name[2...name.length] + end + if name.end_with?("*") + name = name[0..-2] + suffix = ".*" + else + suffix = "" + end + Regexp.escape(name) + suffix + end.join("|") + Regexp.new("\\A(?:#{names})\\z") + end + end + + # Copied from dd-trace-py + DEFAULT_REDACTED_IDENTIFIERS = [ + "2fa", + "accesstoken", + "aiohttpsession", + "apikey", + "apisecret", + "apisignature", + "appkey", + "applicationkey", + "auth", + "authorization", + "authtoken", + "ccnumber", + "certificatepin", + "cipher", + "clientid", + "clientsecret", + "connectionstring", + "connectsid", + "cookie", + "credentials", + "creditcard", + "csrf", + "csrftoken", + "cvv", + "databaseurl", + "dburl", + "encryptionkey", + "encryptionkeyid", + "env", + "geolocation", + "gpgkey", + "ipaddress", + "jti", + "jwt", + "licensekey", + "masterkey", + "mysqlpwd", + "nonce", + "oauth", + "oauthtoken", + "otp", + "passhash", + "passwd", + "password", + "passwordb", + "pemfile", + "pgpkey", + "phpsessid", + "pin", + "pincode", + "pkcs8", + "privatekey", + "publickey", + "pwd", + "recaptchakey", + "refreshtoken", + "routingnumber", + "salt", + "secret", + "secretkey", + "secrettoken", + "securityanswer", + "securitycode", + "securityquestion", + "serviceaccountcredentials", + "session", + "sessionid", + "sessionkey", + "setcookie", + "signature", + "signaturekey", + "sshkey", + "ssn", + "symfony", + "token", + "transactionid", + "twiliotoken", + "usersession", + "voterid", + "xapikey", + "xauthtoken", + "xcsrftoken", + "xforwardedfor", + "xrealip", + "xsrf", + "xsrftoken", + ] + + # Input can be a string or a symbol. + def normalize(str) + str.to_s.strip.downcase.gsub(/[-_$@]/, "") + end + end + end +end diff --git a/sig/datadog/di/redactor.rbs b/sig/datadog/di/redactor.rbs new file mode 100644 index 00000000000..b568e4d139f --- /dev/null +++ b/sig/datadog/di/redactor.rbs @@ -0,0 +1,27 @@ +module Datadog + module DI + class Redactor + @settings: untyped + + @redacted_identifiers: untyped + + @redacted_type_names_regexp: Regexp + + def initialize: (untyped settings) -> void + + attr_reader settings: untyped + + def redact_identifier?: (String name) -> (true | false) + + def redact_type?: (untyped value) -> (true | false) + + private + + def redacted_identifiers: () -> untyped + + def redacted_type_names_regexp: () -> untyped + DEFAULT_REDACTED_IDENTIFIERS: ::Array["2fa" | "accesstoken" | "aiohttpsession" | "apikey" | "apisecret" | "apisignature" | "appkey" | "applicationkey" | "auth" | "authorization" | "authtoken" | "ccnumber" | "certificatepin" | "cipher" | "clientid" | "clientsecret" | "connectionstring" | "connectsid" | "cookie" | "credentials" | "creditcard" | "csrf" | "csrftoken" | "cvv" | "databaseurl" | "dburl" | "encryptionkey" | "encryptionkeyid" | "env" | "geolocation" | "gpgkey" | "ipaddress" | "jti" | "jwt" | "licensekey" | "masterkey" | "mysqlpwd" | "nonce" | "oauth" | "oauthtoken" | "otp" | "passhash" | "passwd" | "password" | "passwordb" | "pemfile" | "pgpkey" | "phpsessid" | "pin" | "pincode" | "pkcs8" | "privatekey" | "publickey" | "pwd" | "recaptchakey" | "refreshtoken" | "routingnumber" | "salt" | "secret" | "secretkey" | "secrettoken" | "securityanswer" | "securitycode" | "securityquestion" | "serviceaccountcredentials" | "session" | "sessionid" | "sessionkey" | "setcookie" | "signature" | "signaturekey" | "sshkey" | "ssn" | "symfony" | "token" | "transactionid" | "twiliotoken" | "usersession" | "voterid" | "xapikey" | "xauthtoken" | "xcsrftoken" | "xforwardedfor" | "xrealip" | "xsrf" | "xsrftoken"] + def normalize: (untyped str) -> untyped + end + end +end diff --git a/spec/datadog/di/redactor_spec.rb b/spec/datadog/di/redactor_spec.rb new file mode 100644 index 00000000000..7e225c99764 --- /dev/null +++ b/spec/datadog/di/redactor_spec.rb @@ -0,0 +1,155 @@ +require "datadog/di/redactor" + +class DIRedactorSpecSensitiveType; end + +class DIRedactorSpecWildCard; end + +class DIRedactorSpecWildCardClass; end + +class DIRedactorSpecWildCa; end + +class DIRedactorSpecPrefixWildCard; end + +class DIRedactorSpecDoubleColon; end + +module DIRedactorSpec + class SensitiveType; end + + class NotSensitiveType; end + + class WildCardSensitiveType; end + + class ExactMatch; end + + class DoubleColonNested; end + + class DoubleColonWildCardType; end +end + +RSpec.describe Datadog::DI::Redactor do + let(:settings) do + double("settings").tap do |settings| + allow(settings).to receive(:dynamic_instrumentation).and_return(di_settings) + end + end + + let(:di_settings) do + double("di settings").tap do |settings| + allow(settings).to receive(:enabled).and_return(true) + allow(settings).to receive(:propagate_all_exceptions).and_return(false) + allow(settings).to receive(:redacted_identifiers).and_return([]) + end + end + + let(:redactor) do + Datadog::DI::Redactor.new(settings) + end + + describe "#redact_identifier?" do + def self.define_cases(cases) + cases.each do |(label, identifier_, redact_)| + identifier, redact = identifier_, redact_ + + context label do + let(:identifier) { identifier } + + it do + expect(redactor.redact_identifier?(identifier)).to be redact + end + end + end + end + + cases = [ + ["lowercase", "password", true], + ["uppercase", "PASSWORD", true], + ["with removed punctiation", "pass_word", true], + ["with non-removed punctuation", "pass/word", false], + ] + + define_cases(cases) + + context "when user-defined redacted identifiers exist" do + before do + expect(di_settings).to receive(:redacted_identifiers).and_return(%w[foo quux]) + end + + cases = [ + ["exact user-defined identifier", "foo", true], + ["prefix of user-defined identifier", "f", false], + ["suffix of user-defined identifier", "oo", false], + ["user-defined identifier with extra punctuation", "f-o-o", true], + ] + + define_cases(cases) + end + end + + describe "#redact_type?" do + let(:redacted_type_names) { + %w[ + DIRedactorSpecSensitiveType + DIRedactorSpecWildCard* + DIRedactorSpec::ExactMatch + DIRedactorSpec::WildCard* + SensitiveType + SensitiveType* + ::DIRedactorSpecDoubleColon + ::DIRedactorSpec::DoubleColonNested + ::DIRedactorSpec::DoubleColonWildCard* + ] + } + + def self.define_cases(cases) + cases.each do |(label, value_, redact_)| + value, redact = value_, redact_ + + context label do + let(:value) { value } + + it do + expect(redactor.redact_type?(value)).to be redact + end + end + end + end + + context "redacted type list is checked" do + before do + expect(di_settings).to receive(:redacted_type_names).and_return(redacted_type_names) + end + + cases = [ + ["redacted", DIRedactorSpecSensitiveType.new, true], + ["not redacted", /123/, false], + ["primitive type", nil, false], + ["wild card type whose name is the same as prefix", DIRedactorSpecWildCard.new, true], + ["wild card type", DIRedactorSpecWildCardClass.new, true], + ["wild card does not match from beginning", DIRedactorSpecPrefixWildCard.new, false], + ["partial wild card prefix match", DIRedactorSpecWildCa.new, false], + ["class object", String, false], + ["anonymous class object", Class.new, false], + ["namespaced class - exact match", DIRedactorSpec::ExactMatch.new, true], + ["namespaced class - wildcard - matched", DIRedactorSpec::WildCardSensitiveType.new, true], + ["namespaced class - tail component match only", DIRedactorSpec::SensitiveType.new, false], + ["double-colon top-level specification", DIRedactorSpecDoubleColon.new, true], + ["double-colon nested specification", DIRedactorSpec::DoubleColonNested.new, true], + ["double-colon nested wildcard", DIRedactorSpec::DoubleColonWildCardType.new, true], + ] + + define_cases(cases) + end + + context "redacted type list is not checked" do + before do + expect(di_settings).not_to receive(:redacted_type_names) + end + + cases = [ + ["instance of anonymous class", Class.new.new, false], + ] + + define_cases(cases) + end + end +end