Skip to content

Commit

Permalink
Merge pull request #2311 from DataDog/fix-invalid-url-exception
Browse files Browse the repository at this point in the history
Handle URLs with invalid characters
  • Loading branch information
lloeki authored Oct 17, 2022
2 parents a06ca0e + a9a7aff commit a30d495
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 7 deletions.
1 change: 1 addition & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Component,Origin,License,Copyright
lib/datadog/core/vendor/multipart-post,https://github.com/socketry/multipart-post,MIT,"Copyright (c) 2007-2013 Nick Sieger."
lib/datadog/tracing/contrib/active_record/vendor,https://github.com/rails/rails/,MIT,"Copyright (c) 2005-2018 David Heinemeier Hansson"
lib/datadog/tracing/contrib/utils/quantization/http.rb,https://github.com/ruby/uri,BSD-2-Clause,"Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved."
ext/ddtrace_profiling_native_extension/private_vm_api_access,https://github.com/ruby/ruby,BSD-2-Clause,"Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved."
msgpack,https://rubygems.org/gems/msgpack,Apache-2.0,"Copyright (c) 2008-2015 Sadayuki Furuhashi"
debase-ruby_core_source,https://rubygems.org/gems/debase-ruby_core_source,MIT for gem and BSD-2-Clause for Ruby sources,"Copyright (c) 2012 Gabriel Horner. Files from Ruby sources are Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved."
20 changes: 14 additions & 6 deletions lib/datadog/tracing/contrib/utils/quantization/http.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,28 @@ module HTTP

PLACEHOLDER = '?'.freeze

# taken from Ruby https://github.com/ruby/uri/blob/ffbab83de6d8748c9454414e02db5317609166eb/lib/uri/rfc3986_parser.rb
# but adjusted to parse only <scheme>://<host>:<port>/ components
# and stop there, since we don't care about the path, query string,
# and fragment components
RFC3986_URL_BASE = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))(?::(?<port>\d*))?)))(?:\/|\z)/.freeze # rubocop:disable Style/RegexpLiteral, Layout/LineLength

module_function

def url(url, options = {})
url!(url, options)
rescue StandardError
options[:placeholder] || PLACEHOLDER
placeholder = options[:placeholder] || PLACEHOLDER

options[:base] == :exclude ? placeholder : "#{base_url(url)}/#{placeholder}"
end

def base_url(url, options = {})
URI.parse(url).tap do |uri|
uri.path = ''
uri.query = nil
uri.fragment = nil
end.to_s
if (m = RFC3986_URL_BASE.match(url))
m[1]
else
''
end
end

def url!(url, options = {})
Expand Down
61 changes: 60 additions & 1 deletion spec/datadog/tracing/contrib/utils/quantization/http_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,26 @@
# URLs do not permit unencoded non-ASCII characters in the URL.
let(:url) { 'http://example.com/path?繋がってて' }

it { is_expected.to eq(described_class::PLACEHOLDER) }
it { is_expected.to eq(format('http://example.com/%s', described_class::PLACEHOLDER)) }

context 'and base: :exclude' do
let(:options) { { base: :exclude } }

it { is_expected.to eq(described_class::PLACEHOLDER) }
end
end

context 'with unencoded ASCII characters' do
# URLs do not permit all ASCII characters to be unencoded in the URL.
let(:url) { 'http://example.com/|' }

it { is_expected.to eq(format('http://example.com/%s', described_class::PLACEHOLDER)) }

context 'and base: :exclude' do
let(:options) { { base: :exclude } }

it { is_expected.to eq(described_class::PLACEHOLDER) }
end
end

context 'with internal obfuscation and the default replacement' do
Expand All @@ -95,6 +114,46 @@
end
end

describe '#base_url' do
subject(:result) { described_class.base_url(url, options) }

let(:options) { {} }

context 'given a URL' do
let(:url) { 'http://example.com/path?category_id=1&sort_by=asc#featured' }

context 'default behavior' do
it { is_expected.to eq('http://example.com') }
end

context 'with Unicode characters' do
# URLs do not permit unencoded non-ASCII characters in the URL.
let(:url) { 'http://example.com/path?繋がってて' }

it { is_expected.to eq('http://example.com') }
end

context 'with unencoded ASCII characters' do
# URLs do not permit all ASCII characters to be unencoded in the URL.
let(:url) { 'http://example.com/|' }

it { is_expected.to eq('http://example.com') }
end

context 'without a base' do
let(:url) { '/foo' }

it { is_expected.to eq('') }
end

context 'that is entirely invalid' do
let(:url) { "\x00" }

it { is_expected.to eq('') }
end
end
end

describe '#query' do
subject(:result) { described_class.query(query, options) }

Expand Down

0 comments on commit a30d495

Please sign in to comment.