Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add HTTP url quantizer #384

Merged
merged 2 commits into from
Mar 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/ddtrace.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
require 'ddtrace/pin'
require 'ddtrace/tracer'
require 'ddtrace/error'
require 'ddtrace/quantization/http'
require 'ddtrace/pipeline'
require 'ddtrace/configuration'
require 'ddtrace/patcher'
Expand Down
86 changes: 86 additions & 0 deletions lib/ddtrace/quantization/http.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
require 'uri'
require 'set'

module Datadog
module Quantization
# Quantization for HTTP resources
module HTTP
PLACEHOLDER = '?'.freeze

module_function

def url(url, options = {})
url!(url, options)
rescue StandardError
options[:placeholder] || PLACEHOLDER
end

def url!(url, options = {})
options ||= {}

URI.parse(url).tap do |uri|
# Format the query string
if uri.query
query = query(uri.query, options[:query])
uri.query = (!query.nil? && query.empty? ? nil : query)
end

# Remove any URI framents
uri.fragment = nil unless options[:fragment] == :show
end.to_s
end

def query(query, options = {})
query!(query, options)
rescue StandardError
options[:placeholder] || PLACEHOLDER
end

def query!(query, options = {})
options ||= {}
options[:show] = options[:show] || []
options[:exclude] = options[:exclude] || []

# Short circuit if query string is meant to exclude everything
# or if the query string is meant to include everything
return '' if options[:exclude] == :all
return query if options[:show] == :all

collect_query(query, uniq: true) do |key, value|
if options[:exclude].include?(key)
[nil, nil]
else
value = options[:show].include?(key) ? value : nil
[key, value]
end
end
end

# Iterate over each key value pair, yielding to the block given.
# Accepts :uniq option, which keeps uniq copies of keys without values.
# e.g. Reduces "foo&bar=bar&bar=bar&foo" to "foo&bar=bar&bar=bar"
def collect_query(query, options = {})
return query unless block_given?
uniq = options[:uniq].nil? ? false : options[:uniq]
keys = Set.new

delims = query.scan(/(^|&|;)/).flatten
query.split(/[&;]/).collect.with_index do |pairs, i|
key, value = pairs.split('=', 2)
key, value = yield(key, value, delims[i])
if uniq && keys.include?(key)
''
elsif key && value
"#{delims[i]}#{key}=#{value}"
elsif key
"#{delims[i]}#{key}".tap { keys << key }
else
''
end
end.join.sub(/^[&;]/, '')
end

private_class_method :collect_query
end
end
end
228 changes: 228 additions & 0 deletions spec/ddtrace/quantization/http_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# encoding: utf-8
require 'spec_helper'

require 'ddtrace/quantization/http'

RSpec.describe Datadog::Quantization::HTTP do
describe '#url' do
subject(:result) { described_class.url(url, options) }
let(:options) { {} }

context 'given a URL' do
let(:url) { 'http://example.com/path?category_id=1&sort_by=asc#featured' }

context 'default behavior' do
it { is_expected.to eq('http://example.com/path?category_id&sort_by') }
end

context 'default behavior for an array' do
let(:url) { 'http://example.com/path?categories[]=1&categories[]=2' }
it { is_expected.to eq('http://example.com/path?categories[]') }
end

context 'with query: show: value' do
let(:options) { { query: { show: ['category_id'] } } }
it { is_expected.to eq('http://example.com/path?category_id=1&sort_by') }
end

context 'with query: show: :all' do
let(:options) { { query: { show: :all } } }
it { is_expected.to eq('http://example.com/path?category_id=1&sort_by=asc') }
end

context 'with query: exclude: value' do
let(:options) { { query: { exclude: ['sort_by'] } } }
it { is_expected.to eq('http://example.com/path?category_id') }
end

context 'with query: exclude: :all' do
let(:options) { { query: { exclude: :all } } }
it { is_expected.to eq('http://example.com/path') }
end

context 'with show: :all' do
let(:options) { { fragment: :show } }
it { is_expected.to eq('http://example.com/path?category_id&sort_by#featured') }
end

context 'with Unicode characters' do
# URLs do not permit unencoded non-ASCII characters in the URL.
let(:url) { "http://example.com/path?繋がってて" }
it { is_expected.to eq(described_class::PLACEHOLDER) }
end
end
end

describe '#query' do
subject(:result) { described_class.query(query, options) }

context 'given a query' do
context 'and no options' do
let(:options) { {} }

context 'with a single parameter' do
let(:query) { 'foo=foo' }
it { is_expected.to eq('foo') }

context 'with an invalid byte sequence' do
# \255 is off-limits https://en.wikipedia.org/wiki/UTF-8#Codepage_layout
# There isn't a graceful way to handle this without stripping interesting
# characters out either; so just raise an error and default to the placeholder.
let(:query) { "foo\255=foo" }
it { is_expected.to eq('?') }
end
end

context 'with multiple parameters' do
let(:query) { 'foo=foo&bar=bar' }
it { is_expected.to eq('foo&bar') }
end

context 'with array-style parameters' do
let(:query) { 'foo[]=bar&foo[]=baz' }
it { is_expected.to eq('foo[]') }
end

context 'with semi-colon style parameters' do
let(:query) { 'foo;bar' }
# Notice semicolons aren't preseved... no great way of handling this.
# Semicolons are illegal as of 2014... so this is an edge case.
# See https://www.w3.org/TR/2014/REC-html5-20141028/forms.html#url-encoded-form-data
it { is_expected.to eq('foo;bar') }
end

context 'with object-style parameters' do
let(:query) { 'user[id]=1&user[name]=Nathan' }
it { is_expected.to eq('user[id]&user[name]') }

context 'that are complex' do
let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' }
it { is_expected.to eq('users[][id]&users[][name]') }
end
end
end

context 'and a show: :all option' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { show: :all } }
it { is_expected.to eq(query) }
end

context 'and a show option' do
context 'with a single parameter' do
let(:query) { 'foo=foo' }
let(:key) { 'foo' }
let(:options) { { show: [key] } }
it { is_expected.to eq('foo=foo') }

context 'that has a Unicode key' do
let(:query) { '繋=foo' }
let(:key) { '繋' }
it { is_expected.to eq('繋=foo') }

context 'that is encoded' do
let(:query) { '%E7%B9%8B=foo' }
let(:key) { '%E7%B9%8B' }
it { is_expected.to eq('%E7%B9%8B=foo') }
end
end

context 'that has a Unicode value' do
let(:query) { 'foo=繋' }
let(:key) { 'foo' }
it { is_expected.to eq('foo=繋') }

context 'that is encoded' do
let(:query) { 'foo=%E7%B9%8B' }
it { is_expected.to eq('foo=%E7%B9%8B') }
end
end

context 'that has a Unicode key and value' do
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

let(:query) { '繋=繋' }
let(:key) { '繋' }
it { is_expected.to eq('繋=繋') }

context 'that is encoded' do
let(:query) { '%E7%B9%8B=%E7%B9%8B' }
let(:key) { '%E7%B9%8B' }
it { is_expected.to eq('%E7%B9%8B=%E7%B9%8B') }
end
end
end

context 'with multiple parameters' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { show: ['foo'] } }
it { is_expected.to eq('foo=foo&bar') }
end

context 'with array-style parameters' do
let(:query) { 'foo[]=bar&foo[]=baz' }
let(:options) { { show: ['foo[]'] } }
it { is_expected.to eq('foo[]=bar&foo[]=baz') }

context 'that contains encoded braces' do
let(:query) { 'foo[]=%5Bbar%5D&foo[]=%5Bbaz%5D' }
it { is_expected.to eq('foo[]=%5Bbar%5D&foo[]=%5Bbaz%5D') }

context 'that exactly matches the key' do
let(:query) { 'foo[]=foo%5B%5D&foo[]=foo%5B%5D' }
it { is_expected.to eq('foo[]=foo%5B%5D&foo[]=foo%5B%5D') }
end
end
end

context 'with object-style parameters' do
let(:query) { 'user[id]=1&user[name]=Nathan' }
let(:options) { { show: ['user[id]'] } }
it { is_expected.to eq('user[id]=1&user[name]') }

context 'that are complex' do
let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' }
let(:options) { { show: ['users[][id]'] } }
it { is_expected.to eq('users[][id]=1&users[][name]&users[][id]=2') }
end
end
end

context 'and an exclude: :all option' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { exclude: :all } }
it { is_expected.to eq('') }
end

context 'and an exclude option' do
context 'with a single parameter' do
let(:query) { 'foo=foo' }
let(:options) { { exclude: ['foo'] } }
it { is_expected.to eq('') }
end

context 'with multiple parameters' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { exclude: ['foo'] } }
it { is_expected.to eq('bar') }
end

context 'with array-style parameters' do
let(:query) { 'foo[]=bar&foo[]=baz' }
let(:options) { { exclude: ['foo[]'] } }
it { is_expected.to eq('') }
end

context 'with object-style parameters' do
let(:query) { 'user[id]=1&user[name]=Nathan' }
let(:options) { { exclude: ['user[name]'] } }
it { is_expected.to eq('user[id]') }

context 'that are complex' do
let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' }
let(:options) { { exclude: ['users[][name]'] } }
it { is_expected.to eq('users[][id]') }
end
end
end
end
end
end