From a141ce029ee8e38ac8a3239b5a6ebe2e6fddcbd5 Mon Sep 17 00:00:00 2001 From: David Elner Date: Mon, 26 Mar 2018 16:31:58 -0400 Subject: [PATCH 1/2] Added: Datadog::Quantization::HTTP module for quantizing HTTP resources. --- lib/ddtrace.rb | 1 + lib/ddtrace/quantization/http.rb | 86 ++++++++++ spec/ddtrace/quantization/http_spec.rb | 227 +++++++++++++++++++++++++ 3 files changed, 314 insertions(+) create mode 100644 lib/ddtrace/quantization/http.rb create mode 100644 spec/ddtrace/quantization/http_spec.rb diff --git a/lib/ddtrace.rb b/lib/ddtrace.rb index 664eab25d8d..eb213184e2a 100644 --- a/lib/ddtrace.rb +++ b/lib/ddtrace.rb @@ -4,6 +4,7 @@ require 'ddtrace/pin' require 'ddtrace/tracer' require 'ddtrace/error' +require 'ddtrace/quantization/http' require 'ddtrace/pipeline' require 'ddtrace/configuration' require 'ddtrace/patcher' diff --git a/lib/ddtrace/quantization/http.rb b/lib/ddtrace/quantization/http.rb new file mode 100644 index 00000000000..48ba93a5542 --- /dev/null +++ b/lib/ddtrace/quantization/http.rb @@ -0,0 +1,86 @@ +require 'uri' +require 'set' + +module Datadog + module Quantization + # Quantization for HTTP resources + module HTTP + PLACEHOLDER = '?'.freeze + + module_function + + def url(url, options = {}) + url!(url, options) + rescue StandardError + options[:placeholder] || PLACEHOLDER + end + + def url!(url, options = {}) + options ||= {} + + URI.parse(url).tap do |uri| + # Format the query string + if uri.query + query = query(uri.query, options[:query]) + uri.query = (!query.nil? && query.empty? ? nil : query) + end + + # Remove any URI framents + uri.fragment = nil unless options[:fragment] == :show + end.to_s + end + + def query(query, options = {}) + query!(query, options) + rescue StandardError + options[:placeholder] || PLACEHOLDER + end + + def query!(query, options = {}) + options ||= {} + options[:show] = options[:show] || [] + options[:exclude] = options[:exclude] || [] + + # Short circuit if query string is meant to exclude everything + # or if the query string is meant to include everything + return '' if options[:exclude] == :all + return query if options[:show] == :all + + collect_query(query, uniq: true) do |key, value| + if options[:exclude].include?(key) + [nil, nil] + else + value = options[:show].include?(key) ? value : nil + [key, value] + end + end + end + + # Iterate over each key value pair, yielding to the block given. + # Accepts :uniq option, which keeps uniq copies of keys without values. + # e.g. Reduces "foo&bar=bar&bar=bar&foo" to "foo&bar=bar&bar=bar" + def collect_query(query, options = {}) + return query unless block_given? + uniq = options[:uniq].nil? ? false : options[:uniq] + keys = Set.new + + delims = query.scan(/(^|&|;)/).flatten + query.split(/[&;]/).collect.with_index do |pairs, i| + key, value = pairs.split('=', 2) + key, value = yield(key, value, delims[i]) + if uniq && keys.include?(key) + '' + elsif key && value + "#{delims[i]}#{key}=#{value}" + elsif key + "#{delims[i]}#{key}".tap { keys << key } + else + '' + end + end.join.sub(/^[&;]/, '') + end + + private_class_method :collect_query + end + end +end diff --git a/spec/ddtrace/quantization/http_spec.rb b/spec/ddtrace/quantization/http_spec.rb new file mode 100644 index 00000000000..24db2172c23 --- /dev/null +++ b/spec/ddtrace/quantization/http_spec.rb @@ -0,0 +1,227 @@ +require 'spec_helper' + +require 'ddtrace/quantization/http' + +RSpec.describe Datadog::Quantization::HTTP do + describe '#url' do + subject(:result) { described_class.url(url, options) } + let(:options) { {} } + + context 'given a URL' do + let(:url) { 'http://example.com/path?category_id=1&sort_by=asc#featured' } + + context 'default behavior' do + it { is_expected.to eq('http://example.com/path?category_id&sort_by') } + end + + context 'default behavior for an array' do + let(:url) { 'http://example.com/path?categories[]=1&categories[]=2' } + it { is_expected.to eq('http://example.com/path?categories[]') } + end + + context 'with query: show: value' do + let(:options) { { query: { show: ['category_id'] } } } + it { is_expected.to eq('http://example.com/path?category_id=1&sort_by') } + end + + context 'with query: show: :all' do + let(:options) { { query: { show: :all } } } + it { is_expected.to eq('http://example.com/path?category_id=1&sort_by=asc') } + end + + context 'with query: exclude: value' do + let(:options) { { query: { exclude: ['sort_by'] } } } + it { is_expected.to eq('http://example.com/path?category_id') } + end + + context 'with query: exclude: :all' do + let(:options) { { query: { exclude: :all } } } + it { is_expected.to eq('http://example.com/path') } + end + + context 'with show: :all' do + let(:options) { { fragment: :show } } + it { is_expected.to eq('http://example.com/path?category_id&sort_by#featured') } + end + + context 'with Unicode characters' do + # URLs do not permit unencoded non-ASCII characters in the URL. + let(:url) { 'http://example.com/path?繋がってて' } + it { is_expected.to eq(described_class::PLACEHOLDER) } + end + end + end + + describe '#query' do + subject(:result) { described_class.query(query, options) } + + context 'given a query' do + context 'and no options' do + let(:options) { {} } + + context 'with a single parameter' do + let(:query) { 'foo=foo' } + it { is_expected.to eq('foo') } + + context 'with an invalid byte sequence' do + # \255 is off-limits https://en.wikipedia.org/wiki/UTF-8#Codepage_layout + # There isn't a graceful way to handle this without stripping interesting + # characters out either; so just raise an error and default to the placeholder. + let(:query) { "foo\255=foo" } + it { is_expected.to eq('?') } + end + end + + context 'with multiple parameters' do + let(:query) { 'foo=foo&bar=bar' } + it { is_expected.to eq('foo&bar') } + end + + context 'with array-style parameters' do + let(:query) { 'foo[]=bar&foo[]=baz' } + it { is_expected.to eq('foo[]') } + end + + context 'with semi-colon style parameters' do + let(:query) { 'foo;bar' } + # Notice semicolons aren't preseved... no great way of handling this. + # Semicolons are illegal as of 2014... so this is an edge case. + # See https://www.w3.org/TR/2014/REC-html5-20141028/forms.html#url-encoded-form-data + it { is_expected.to eq('foo;bar') } + end + + context 'with object-style parameters' do + let(:query) { 'user[id]=1&user[name]=Nathan' } + it { is_expected.to eq('user[id]&user[name]') } + + context 'that are complex' do + let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' } + it { is_expected.to eq('users[][id]&users[][name]') } + end + end + end + + context 'and a show: :all option' do + let(:query) { 'foo=foo&bar=bar' } + let(:options) { { show: :all } } + it { is_expected.to eq(query) } + end + + context 'and a show option' do + context 'with a single parameter' do + let(:query) { 'foo=foo' } + let(:key) { 'foo' } + let(:options) { { show: [key] } } + it { is_expected.to eq('foo=foo') } + + context 'that has a Unicode key' do + let(:query) { '繋=foo' } + let(:key) { '繋' } + it { is_expected.to eq('繋=foo') } + + context 'that is encoded' do + let(:query) { '%E7%B9%8B=foo' } + let(:key) { '%E7%B9%8B' } + it { is_expected.to eq('%E7%B9%8B=foo') } + end + end + + context 'that has a Unicode value' do + let(:query) { 'foo=繋' } + let(:key) { 'foo' } + it { is_expected.to eq('foo=繋') } + + context 'that is encoded' do + let(:query) { 'foo=%E7%B9%8B' } + it { is_expected.to eq('foo=%E7%B9%8B') } + end + end + + context 'that has a Unicode key and value' do + let(:query) { '繋=繋' } + let(:key) { '繋' } + it { is_expected.to eq('繋=繋') } + + context 'that is encoded' do + let(:query) { '%E7%B9%8B=%E7%B9%8B' } + let(:key) { '%E7%B9%8B' } + it { is_expected.to eq('%E7%B9%8B=%E7%B9%8B') } + end + end + end + + context 'with multiple parameters' do + let(:query) { 'foo=foo&bar=bar' } + let(:options) { { show: ['foo'] } } + it { is_expected.to eq('foo=foo&bar') } + end + + context 'with array-style parameters' do + let(:query) { 'foo[]=bar&foo[]=baz' } + let(:options) { { show: ['foo[]'] } } + it { is_expected.to eq('foo[]=bar&foo[]=baz') } + + context 'that contains encoded braces' do + let(:query) { 'foo[]=%5Bbar%5D&foo[]=%5Bbaz%5D' } + it { is_expected.to eq('foo[]=%5Bbar%5D&foo[]=%5Bbaz%5D') } + + context 'that exactly matches the key' do + let(:query) { 'foo[]=foo%5B%5D&foo[]=foo%5B%5D' } + it { is_expected.to eq('foo[]=foo%5B%5D&foo[]=foo%5B%5D') } + end + end + end + + context 'with object-style parameters' do + let(:query) { 'user[id]=1&user[name]=Nathan' } + let(:options) { { show: ['user[id]'] } } + it { is_expected.to eq('user[id]=1&user[name]') } + + context 'that are complex' do + let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' } + let(:options) { { show: ['users[][id]'] } } + it { is_expected.to eq('users[][id]=1&users[][name]&users[][id]=2') } + end + end + end + + context 'and an exclude: :all option' do + let(:query) { 'foo=foo&bar=bar' } + let(:options) { { exclude: :all } } + it { is_expected.to eq('') } + end + + context 'and an exclude option' do + context 'with a single parameter' do + let(:query) { 'foo=foo' } + let(:options) { { exclude: ['foo'] } } + it { is_expected.to eq('') } + end + + context 'with multiple parameters' do + let(:query) { 'foo=foo&bar=bar' } + let(:options) { { exclude: ['foo'] } } + it { is_expected.to eq('bar') } + end + + context 'with array-style parameters' do + let(:query) { 'foo[]=bar&foo[]=baz' } + let(:options) { { exclude: ['foo[]'] } } + it { is_expected.to eq('') } + end + + context 'with object-style parameters' do + let(:query) { 'user[id]=1&user[name]=Nathan' } + let(:options) { { exclude: ['user[name]'] } } + it { is_expected.to eq('user[id]') } + + context 'that are complex' do + let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' } + let(:options) { { exclude: ['users[][name]'] } } + it { is_expected.to eq('users[][id]') } + end + end + end + end + end +end From 3204d8a28d94b4267bfb52cdd393302c5c2d0af5 Mon Sep 17 00:00:00 2001 From: David Elner Date: Mon, 26 Mar 2018 16:54:09 -0400 Subject: [PATCH 2/2] Fixed: Quantizer spec UTF-8 string incompatibility with Ruby 1.9.3 --- spec/ddtrace/quantization/http_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/ddtrace/quantization/http_spec.rb b/spec/ddtrace/quantization/http_spec.rb index 24db2172c23..e6c3aa0e36c 100644 --- a/spec/ddtrace/quantization/http_spec.rb +++ b/spec/ddtrace/quantization/http_spec.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 require 'spec_helper' require 'ddtrace/quantization/http' @@ -46,7 +47,7 @@ context 'with Unicode characters' do # URLs do not permit unencoded non-ASCII characters in the URL. - let(:url) { 'http://example.com/path?繋がってて' } + let(:url) { "http://example.com/path?繋がってて" } it { is_expected.to eq(described_class::PLACEHOLDER) } end end