From 3a5f21e700817e8908ce1e45c7187cfb324f77ba Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:09:15 -0500 Subject: [PATCH 01/14] DEBUG-2334 custom ActiveRecord model serialization (#4088) Adds a custom serializer for ActiveRecord models Adds serializers for Time, Date and DateTime (needed for ActiveRecord timestamps) Motivation: AR models have many fields that are not interesting for DI, and serializing them would 1) consume a lot of resources 2) cause the serialized payload to exceed the limits of the backend 3) not serialize actual attributes due to depth limits (actual attributes are already 3 levels down from the AR model). The custom serializer currently only serializes the attributes, though additional fields can be added in the future such as the new_record? flag. --- Matrixfile | 5 +- Rakefile | 8 + lib/datadog/di/contrib/active_record.rb | 11 + lib/datadog/di/serializer.rb | 238 +++++++++++------- sig/datadog/di/contrib/active_record.rbs | 0 sig/datadog/di/serializer.rbs | 9 +- .../serializer_active_record_spec.rb | 113 +++++++++ spec/datadog/di/serializer_helper.rb | 61 +++++ spec/datadog/di/serializer_spec.rb | 101 +++++--- 9 files changed, 408 insertions(+), 138 deletions(-) create mode 100644 lib/datadog/di/contrib/active_record.rb create mode 100644 sig/datadog/di/contrib/active_record.rbs create mode 100644 spec/datadog/di/contrib/active_record/serializer_active_record_spec.rb create mode 100644 spec/datadog/di/serializer_helper.rb diff --git a/Matrixfile b/Matrixfile index 56a71886cf7..fe3c0548b57 100644 --- a/Matrixfile +++ b/Matrixfile @@ -285,7 +285,10 @@ 'graphql-2.1' => '❌ 2.5 / ❌ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', 'graphql-2.0' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', 'graphql-1.13' => '❌ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', - } + }, + 'di:active_record' => { + 'rails61-mysql2' => '❌ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ❌ jruby', + }, }.each_with_object({}) do |(tasks, spec_metadata), hash| # Explode arrays of task names into individual tasks # e.g. ['rails', 'railsdisableenv'] => {'...'} becomes 'rails7' => {'...'}, 'railsdisableenv7' => {'...'} diff --git a/Rakefile b/Rakefile index aa67152a835..08d21a2cfdc 100644 --- a/Rakefile +++ b/Rakefile @@ -314,6 +314,14 @@ namespace :spec do task appsec: [:'appsec:all'] + namespace :di do + desc '' # "Explicitly hiding from `rake -T`" + RSpec::Core::RakeTask.new(:active_record) do |t, args| + t.pattern = 'spec/datadog/di/contrib/active_record/**/*_spec.rb' + t.rspec_opts = args.to_a.join(' ') + end + end + namespace :profiling do task all: [:main, :ractors] diff --git a/lib/datadog/di/contrib/active_record.rb b/lib/datadog/di/contrib/active_record.rb new file mode 100644 index 00000000000..d929cde49ed --- /dev/null +++ b/lib/datadog/di/contrib/active_record.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +Datadog::DI::Serializer.register(condition: lambda { |value| ActiveRecord::Base === value }) do |serializer, value, name:, depth:| # steep:ignore + # steep thinks all of the arguments are nil here + # steep:ignore:start + value_to_serialize = { + attributes: value.attributes, + } + serializer.serialize_value(value_to_serialize, depth: depth ? depth - 1 : nil, type: value.class) + # steep:ignore:end +end diff --git a/lib/datadog/di/serializer.rb b/lib/datadog/di/serializer.rb index 2fefa0d91eb..511061e5dfc 100644 --- a/lib/datadog/di/serializer.rb +++ b/lib/datadog/di/serializer.rb @@ -38,13 +38,42 @@ module DI # # @api private class Serializer - def initialize(settings, redactor) + # Third-party library integration / custom serializers. + # + # Dynamic instrumentation has limited payload sizes, and for efficiency + # reasons it is not desirable to transmit data to Datadog that will + # never contain useful information. Additionally, due to depth limits, + # desired data may not even be included in payloads when serialized + # with the default, naive serializer. Therefore, custom objects like + # ActiveRecord model instances may need custom serializers. + # + # CUSTOMER NOTE: The API for defining custom serializers is not yet + # finalized. Please create an issue at + # https://github.com/datadog/dd-trace-rb/issues describing the + # object(s) you wish to serialize so that we can ensure your use case + # will be supported as the library evolves. + # + # Note that the current implementation does not permit defining a + # serializer for a particular class, which is the simplest use case. + # This is because the library itself does not need this functionality + # yet, and it won't help for ActiveRecord models (that derive from + # a common base class but are all of different classes) or for Mongoid + # models (that do not have a common base class at all but include a + # standard Mongoid module). + @@flat_registry = [] + def self.register(condition: nil, &block) + @@flat_registry << {condition: condition, proc: block} + end + + def initialize(settings, redactor, telemetry: nil) @settings = settings @redactor = redactor + @telemetry = telemetry end attr_reader :settings attr_reader :redactor + attr_reader :telemetry # Serializes positional and keyword arguments to a method, # as obtained by a method probe. @@ -86,116 +115,135 @@ def serialize_vars(vars) # (integers, strings, arrays, hashes). # # Respects string length, collection size and traversal depth limits. - def serialize_value(value, name: nil, depth: settings.dynamic_instrumentation.max_capture_depth) - if redactor.redact_type?(value) - return {type: class_name(value.class), notCapturedReason: "redactedType"} - end + def serialize_value(value, name: nil, depth: settings.dynamic_instrumentation.max_capture_depth, type: nil) + cls = type || value.class + begin + if redactor.redact_type?(value) + return {type: class_name(cls), notCapturedReason: "redactedType"} + end - if name && redactor.redact_identifier?(name) - return {type: class_name(value.class), notCapturedReason: "redactedIdent"} - end + if name && redactor.redact_identifier?(name) + return {type: class_name(cls), notCapturedReason: "redactedIdent"} + end - serialized = {type: class_name(value.class)} - case value - when NilClass - serialized.update(isNull: true) - when Integer, Float, TrueClass, FalseClass - serialized.update(value: value.to_s) - when String, Symbol - need_dup = false - value = if String === value - # This is the only place where we duplicate the value, currently. - # All other values are immutable primitives (e.g. numbers). - # However, do not duplicate if the string is frozen, or if - # it is later truncated. - need_dup = !value.frozen? - value - else - value.to_s + @@flat_registry.each do |entry| + if (condition = entry[:condition]) && condition.call(value) + serializer_proc = entry.fetch(:proc) + return serializer_proc.call(self, value, name: nil, depth: depth) + end end - max = settings.dynamic_instrumentation.max_capture_string_length - if value.length > max - serialized.update(truncated: true, size: value.length) - value = value[0...max] + + serialized = {type: class_name(cls)} + case value + when NilClass + serialized.update(isNull: true) + when Integer, Float, TrueClass, FalseClass + serialized.update(value: value.to_s) + when Time + # This path also handles DateTime values although they do not need + # to be explicitly added to the case statement. + serialized.update(value: value.iso8601) + when Date + serialized.update(value: value.to_s) + when String, Symbol need_dup = false - end - value = value.dup if need_dup - serialized.update(value: value) - when Array - if depth < 0 - serialized.update(notCapturedReason: "depth") - else - max = settings.dynamic_instrumentation.max_capture_collection_size - if max != 0 && value.length > max - serialized.update(notCapturedReason: "collectionSize", size: value.length) - # same steep failure with array slices. - # https://github.com/soutaro/steep/issues/1219 - value = value[0...max] || [] + value = if String === value + # This is the only place where we duplicate the value, currently. + # All other values are immutable primitives (e.g. numbers). + # However, do not duplicate if the string is frozen, or if + # it is later truncated. + need_dup = !value.frozen? + value + else + value.to_s end - entries = value.map do |elt| - serialize_value(elt, depth: depth - 1) + max = settings.dynamic_instrumentation.max_capture_string_length + if value.length > max + serialized.update(truncated: true, size: value.length) + value = value[0...max] + need_dup = false end - serialized.update(elements: entries) - end - when Hash - if depth < 0 - serialized.update(notCapturedReason: "depth") - else - max = settings.dynamic_instrumentation.max_capture_collection_size - cur = 0 - entries = [] - value.each do |k, v| - if max != 0 && cur >= max + value = value.dup if need_dup + serialized.update(value: value) + when Array + if depth < 0 + serialized.update(notCapturedReason: "depth") + else + max = settings.dynamic_instrumentation.max_capture_collection_size + if max != 0 && value.length > max serialized.update(notCapturedReason: "collectionSize", size: value.length) - break + # same steep failure with array slices. + # https://github.com/soutaro/steep/issues/1219 + value = value[0...max] || [] + end + entries = value.map do |elt| + serialize_value(elt, depth: depth - 1) end - cur += 1 - entries << [serialize_value(k, depth: depth - 1), serialize_value(v, name: k, depth: depth - 1)] + serialized.update(elements: entries) + end + when Hash + if depth < 0 + serialized.update(notCapturedReason: "depth") + else + max = settings.dynamic_instrumentation.max_capture_collection_size + cur = 0 + entries = [] + value.each do |k, v| + if max != 0 && cur >= max + serialized.update(notCapturedReason: "collectionSize", size: value.length) + break + end + cur += 1 + entries << [serialize_value(k, depth: depth - 1), serialize_value(v, name: k, depth: depth - 1)] + end + serialized.update(entries: entries) end - serialized.update(entries: entries) - end - else - if depth < 0 - serialized.update(notCapturedReason: "depth") else - fields = {} - max = settings.dynamic_instrumentation.max_capture_attribute_count - cur = 0 + if depth < 0 + serialized.update(notCapturedReason: "depth") + else + fields = {} + max = settings.dynamic_instrumentation.max_capture_attribute_count + cur = 0 - # MRI and JRuby 9.4.5+ preserve instance variable definition - # order when calling #instance_variables. Previous JRuby versions - # did not preserve order and returned the variables in arbitrary - # order. - # - # The arbitrary order is problematic because 1) when there are - # fewer instance variables than capture limit, the order in which - # the variables are shown in UI will change from one capture to - # the next and generally will be arbitrary to the user, and - # 2) when there are more instance variables than capture limit, - # *which* variables are captured will also change meaning user - # looking at the UI may have "new" instance variables appear and - # existing ones disappear as they are looking at multiple captures. - # - # For consistency, we should have some kind of stable order of - # instance variables on all supported Ruby runtimes, so that the UI - # stays consistent. Given that initial implementation of Ruby DI - # does not support JRuby, we don't handle JRuby's lack of ordering - # of #instance_variables here, but if JRuby is supported in the - # future this may need to be addressed. - ivars = value.instance_variables + # MRI and JRuby 9.4.5+ preserve instance variable definition + # order when calling #instance_variables. Previous JRuby versions + # did not preserve order and returned the variables in arbitrary + # order. + # + # The arbitrary order is problematic because 1) when there are + # fewer instance variables than capture limit, the order in which + # the variables are shown in UI will change from one capture to + # the next and generally will be arbitrary to the user, and + # 2) when there are more instance variables than capture limit, + # *which* variables are captured will also change meaning user + # looking at the UI may have "new" instance variables appear and + # existing ones disappear as they are looking at multiple captures. + # + # For consistency, we should have some kind of stable order of + # instance variables on all supported Ruby runtimes, so that the UI + # stays consistent. Given that initial implementation of Ruby DI + # does not support JRuby, we don't handle JRuby's lack of ordering + # of #instance_variables here, but if JRuby is supported in the + # future this may need to be addressed. + ivars = value.instance_variables - ivars.each do |ivar| - if cur >= max - serialized.update(notCapturedReason: "fieldCount", fields: fields) - break + ivars.each do |ivar| + if cur >= max + serialized.update(notCapturedReason: "fieldCount", fields: fields) + break + end + cur += 1 + fields[ivar] = serialize_value(value.instance_variable_get(ivar), name: ivar, depth: depth - 1) end - cur += 1 - fields[ivar] = serialize_value(value.instance_variable_get(ivar), name: ivar, depth: depth - 1) + serialized.update(fields: fields) end - serialized.update(fields: fields) end + serialized + rescue => exc + telemetry&.report(exc, description: "Error serializing") + {type: class_name(cls), notSerializedReason: exc.to_s} end - serialized end private diff --git a/sig/datadog/di/contrib/active_record.rbs b/sig/datadog/di/contrib/active_record.rbs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sig/datadog/di/serializer.rbs b/sig/datadog/di/serializer.rbs index eb428098bc1..acaacece168 100644 --- a/sig/datadog/di/serializer.rbs +++ b/sig/datadog/di/serializer.rbs @@ -4,16 +4,23 @@ module Datadog @settings: untyped @redactor: untyped + + @telemetry: Core::Telemetry::Component - def initialize: (untyped settings, untyped redactor) -> void + def initialize: (untyped settings, untyped redactor, ?telemetry: Core::Telemetry::Component) -> void attr_reader settings: Datadog::Core::Configuration::Settings attr_reader redactor: Datadog::DI::Redactor + attr_reader telemetry: Core::Telemetry::Component + def serialize_args: (untyped args, untyped kwargs) -> untyped def serialize_vars: (untyped vars) -> untyped def serialize_value: (untyped value, ?name: String, ?depth: Integer) -> untyped + + def self.register: (?condition: Proc) { + (serializer: Serializer, value: untyped, name: Symbol, depth: Integer) -> untyped } -> void private def class_name: (untyped cls) -> untyped diff --git a/spec/datadog/di/contrib/active_record/serializer_active_record_spec.rb b/spec/datadog/di/contrib/active_record/serializer_active_record_spec.rb new file mode 100644 index 00000000000..df6cf17f317 --- /dev/null +++ b/spec/datadog/di/contrib/active_record/serializer_active_record_spec.rb @@ -0,0 +1,113 @@ +require 'datadog/tracing/contrib/rails/rails_helper' +require "datadog/di/spec_helper" +require "datadog/di/serializer" +require_relative '../../serializer_helper' +require 'active_record' +require "datadog/di/contrib/active_record" + +class SerializerRailsSpecTestEmptyModel < ActiveRecord::Base +end + +class SerializerRailsSpecTestBasicModel < ActiveRecord::Base +end + +RSpec.describe Datadog::DI::Serializer do + di_test + + extend SerializerHelper + + before(:all) do + @original_config = begin + if defined?(::ActiveRecord::Base.connection_db_config) + ::ActiveRecord::Base.connection_db_config + else + ::ActiveRecord::Base.connection_config + end + rescue ActiveRecord::ConnectionNotEstablished + end + + ActiveRecord::Base.establish_connection(mysql_connection_string) + + ActiveRecord::Schema.define(version: 20161003090450) do + create_table 'serializer_rails_spec_test_empty_models', force: :cascade do |t| + end + + create_table 'serializer_rails_spec_test_basic_models', force: :cascade do |t| + t.string 'title' + t.datetime 'created_at', null: false + t.datetime 'updated_at', null: false + end + end + end + + def mysql + { + database: ENV.fetch('TEST_MYSQL_DB', 'mysql'), + host: ENV.fetch('TEST_MYSQL_HOST', '127.0.0.1'), + password: ENV.fetch('TEST_MYSQL_ROOT_PASSWORD', 'root'), + port: ENV.fetch('TEST_MYSQL_PORT', '3306') + } + end + + def mysql_connection_string + "mysql2://root:#{mysql[:password]}@#{mysql[:host]}:#{mysql[:port]}/#{mysql[:database]}" + end + + after(:all) do + ::ActiveRecord::Base.establish_connection(@original_config) if @original_config + end + + let(:redactor) do + Datadog::DI::Redactor.new(settings) + end + + default_settings + + let(:serializer) do + described_class.new(settings, redactor) + end + + describe "#serialize_value" do + let(:serialized) do + serializer.serialize_value(value, **options) + end + + cases = [ + {name: "AR model with no attributes", + input: -> { SerializerRailsSpecTestEmptyModel.new }, + expected: {type: "SerializerRailsSpecTestEmptyModel", entries: [[ + {type: 'Symbol', value: 'attributes'}, + {type: 'Hash', entries: [[{type: 'String', value: 'id'}, {type: 'NilClass', isNull: true}]]}, + ]]}}, + {name: "AR model with empty attributes", + input: -> { SerializerRailsSpecTestBasicModel.new }, + expected: {type: "SerializerRailsSpecTestBasicModel", entries: [[ + {type: 'Symbol', value: 'attributes'}, + {type: 'Hash', entries: [ + [{type: 'String', value: 'id'}, {type: 'NilClass', isNull: true}], + [{type: 'String', value: 'title'}, {type: 'NilClass', isNull: true}], + [{type: 'String', value: 'created_at'}, {type: 'NilClass', isNull: true}], + [{type: 'String', value: 'updated_at'}, {type: 'NilClass', isNull: true}], + ]}, + ]]}}, + {name: "AR model with filled out attributes", + input: -> { + SerializerRailsSpecTestBasicModel.new( + title: 'Hello, world!', created_at: Time.utc(2020, 1, 2), updated_at: Time.utc(2020, 1, 3) + ) + }, + expected: {type: "SerializerRailsSpecTestBasicModel", entries: [[ + {type: 'Symbol', value: 'attributes'}, + {type: 'Hash', entries: [ + [{type: 'String', value: 'id'}, {type: 'NilClass', isNull: true}], + [{type: 'String', value: 'title'}, {type: 'String', value: 'Hello, world!'}], + # TODO serialize Time, Date, DateTime types + [{type: 'String', value: 'created_at'}, {type: 'Time', value: '2020-01-02T00:00:00Z'}], + [{type: 'String', value: 'updated_at'}, {type: 'Time', value: '2020-01-03T00:00:00Z'}], + ]}, + ]]}}, + ] + + define_serialize_value_cases(cases) + end +end diff --git a/spec/datadog/di/serializer_helper.rb b/spec/datadog/di/serializer_helper.rb new file mode 100644 index 00000000000..0559193e268 --- /dev/null +++ b/spec/datadog/di/serializer_helper.rb @@ -0,0 +1,61 @@ +# rubocop:disable Lint/AssignmentInCondition + +module SerializerHelper + def define_serialize_value_cases(cases) + cases.each do |c| + value = c.fetch(:input) + var_name = c[:var_name] + + context c.fetch(:name) do + let(:value) do + if Proc === value + value.call + else + value + end + end + + let(:options) do + {name: var_name} + end + + if expected_matches = c[:expected_matches] + it "serialization matches expectation" do + expect(serialized).to match(expected_matches) + end + else + expected = c.fetch(:expected) + it "serializes exactly as specified" do + expect(serialized).to eq(expected) + end + end + end + end + end + + def default_settings + let(:settings) do + double("settings").tap do |settings| + allow(settings).to receive(:dynamic_instrumentation).and_return(di_settings) + end + end + + let(:di_settings) do + double("di settings").tap do |settings| + allow(settings).to receive(:enabled).and_return(true) + allow(settings).to receive(:propagate_all_exceptions).and_return(false) + allow(settings).to receive(:redacted_identifiers).and_return([]) + allow(settings).to receive(:redacted_type_names).and_return(%w[ + DISerializerSpecSensitiveType DISerializerSpecWildCard* + ]) + allow(settings).to receive(:max_capture_collection_size).and_return(10) + allow(settings).to receive(:max_capture_attribute_count).and_return(10) + # Reduce max capture depth to 2 from default of 3 + allow(settings).to receive(:max_capture_depth).and_return(2) + allow(settings).to receive(:max_capture_string_length).and_return(100) + end + end + end +end + +# rubocop:enable Lint/AssignmentInCondition diff --git a/spec/datadog/di/serializer_spec.rb b/spec/datadog/di/serializer_spec.rb index 2e870d9cad3..d7bce780fe0 100644 --- a/spec/datadog/di/serializer_spec.rb +++ b/spec/datadog/di/serializer_spec.rb @@ -1,5 +1,6 @@ require "datadog/di/spec_helper" require "datadog/di/serializer" +require_relative 'serializer_helper' class DISerializerSpecSensitiveType end @@ -29,30 +30,14 @@ def initialize # Should have no instance variables class DISerializerSpecTestClass; end +class DISerializerCustomExceptionTestClass; end + RSpec.describe Datadog::DI::Serializer do di_test - let(:settings) do - double("settings").tap do |settings| - allow(settings).to receive(:dynamic_instrumentation).and_return(di_settings) - end - end + extend SerializerHelper - let(:di_settings) do - double("di settings").tap do |settings| - allow(settings).to receive(:enabled).and_return(true) - allow(settings).to receive(:propagate_all_exceptions).and_return(false) - allow(settings).to receive(:redacted_identifiers).and_return([]) - allow(settings).to receive(:redacted_type_names).and_return(%w[ - DISerializerSpecSensitiveType DISerializerSpecWildCard* - ]) - allow(settings).to receive(:max_capture_collection_size).and_return(10) - allow(settings).to receive(:max_capture_attribute_count).and_return(10) - # Reduce max capture depth to 2 from default of 3 - allow(settings).to receive(:max_capture_depth).and_return(2) - allow(settings).to receive(:max_capture_string_length).and_return(100) - end - end + default_settings let(:redactor) do Datadog::DI::Redactor.new(settings) @@ -67,26 +52,6 @@ class DISerializerSpecTestClass; end serializer.serialize_value(value, **options) end - def self.define_cases(cases) - cases.each do |c| - value = c.fetch(:input) - expected = c.fetch(:expected) - var_name = c[:var_name] - - context c.fetch(:name) do - let(:value) { value } - - let(:options) do - {name: var_name} - end - - it "serializes as expected" do - expect(serialized).to eq(expected) - end - end - end - end - cases = [ {name: "nil value", input: nil, expected: {type: "NilClass", isNull: true}}, {name: "true value", input: true, expected: {type: "TrueClass", value: "true"}}, @@ -100,9 +65,19 @@ def self.define_cases(cases) expected: {type: "String", notCapturedReason: "redactedIdent"}}, {name: "variable name given and is not a redacted identifier", input: "123", var_name: "normal", expected: {type: "String", value: "123"}}, + # We can assert exact value when the time zone is UTC, + # since we don't know the local time zone ahead of time. + {name: 'Time value in UTC', input: Time.utc(2020, 1, 2, 3, 4, 5), + expected: {type: 'Time', value: '2020-01-02T03:04:05Z'}}, + {name: 'Time value in local time zone', input: Time.local(2020, 1, 2, 3, 4, 5), + expected_matches: {type: 'Time', value: %r{\A2020-01-02T03:04:05[-+]\d\d:\d\d\z}}}, + {name: 'Date value', input: Date.new(2020, 1, 2), + expected: {type: 'Date', value: '2020-01-02'}}, + {name: 'DateTime value', input: DateTime.new(2020, 1, 2, 3, 4, 5), + expected: {type: 'DateTime', value: '2020-01-02T03:04:05+00:00'}}, ] - define_cases(cases) + define_serialize_value_cases(cases) end describe "#serialize_vars" do @@ -395,4 +370,48 @@ def self.define_cases(cases) end end end + + describe '.register' do + context 'with condition' do + before do + described_class.register(condition: lambda { |value| String === value && value =~ /serializer spec hello/ }) do |serializer, value, name:, depth:| + serializer.serialize_value('replacement value') + end + end + + let(:expected) do + {type: 'String', value: 'replacement value'} + end + + it 'invokes custom serializer' do + serialized = serializer.serialize_value('serializer spec hello world') + expect(serialized).to eq(expected) + end + end + end + + context 'when serialization raises an exception' do + before do + # Register a custom serializer that will raise an exception + Datadog::DI::Serializer.register(condition: lambda { |value| DISerializerCustomExceptionTestClass === value }) do |*args| + raise "Test exception" + end + end + + describe "#serialize_value" do + let(:serialized) do + serializer.serialize_value(value, **options) + end + + cases = [ + {name: "serializes other values", input: {a: DISerializerCustomExceptionTestClass.new, b: 1}, + expected: {type: "Hash", entries: [ + [{type: 'Symbol', value: 'a'}, {type: 'DISerializerCustomExceptionTestClass', notSerializedReason: 'Test exception'}], + [{type: 'Symbol', value: 'b'}, {type: 'Integer', value: '1'}], + ]}}, + ] + + define_serialize_value_cases(cases) + end + end end From 817de953548c1ace3903f10adf8192df01fe6647 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 11 Nov 2024 16:34:42 +0100 Subject: [PATCH 02/14] Update circelci --- .circleci/config.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 189a9709d90..78fd03f0172 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -164,6 +164,12 @@ orbs: <<: *test_job_default steps: - checkout + - run: + name: Know the environment + command: | + ruby -v + gem -v + bundle -v - restore_cache: keys: - '{{ .Environment.CIRCLE_CACHE_VERSION }}-bundled-repo-<>-{{ .Environment.CIRCLE_SHA1 }}' From 6ff55136d2d55050e3c4b6e1af5afd095cbc0439 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 11 Nov 2024 16:58:25 +0100 Subject: [PATCH 03/14] Update binary_version --- .circleci/images/primary/binary_version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/images/primary/binary_version b/.circleci/images/primary/binary_version index 408b885ce54..bf7aeeb3e2e 100644 --- a/.circleci/images/primary/binary_version +++ b/.circleci/images/primary/binary_version @@ -1 +1 @@ -461 +462 From ed331b3d8d6f83b764c004102a19a42d90689086 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 11 Nov 2024 17:29:02 +0100 Subject: [PATCH 04/14] Check missing deps --- Rakefile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Rakefile b/Rakefile index 08d21a2cfdc..b951e3ab2c2 100644 --- a/Rakefile +++ b/Rakefile @@ -56,13 +56,13 @@ namespace :test do end candidates.each do |appraisal_group, _| - command = if appraisal_group.empty? - "bundle exec rake #{spec_task}" - else - gemfile = File.join(File.dirname(__FILE__), 'gemfiles', "#{ruby_runtime}-#{appraisal_group}.gemfile".tr('-', '_')) - "env BUNDLE_GEMFILE=#{gemfile} bundle exec rake #{spec_task}" - end - + env = if appraisal_group.empty? + {} + else + gemfile = File.join(File.dirname(__FILE__), 'gemfiles', "#{ruby_runtime}-#{appraisal_group}.gemfile".tr('-', '_')) + { 'BUNDLE_GEMFILE' => gemfile } + end + command = "bundle check || bundle install && bundle exec rake #{spec_task}" command += "'[#{spec_arguments}]'" if spec_arguments total_executors = ENV.key?('CIRCLE_NODE_TOTAL') ? ENV['CIRCLE_NODE_TOTAL'].to_i : nil @@ -71,9 +71,9 @@ namespace :test do if total_executors && current_executor && total_executors > 1 @execution_count ||= 0 @execution_count += 1 - sh(command) if @execution_count % total_executors == current_executor + Bundler.with_unbundled_env { sh(env, command) } if @execution_count % total_executors == current_executor else - sh(command) + Bundler.with_unbundled_env { sh(env, command) } end end end From 2b8526618f4e412af8354ce22a66398527aee94e Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 11 Nov 2024 23:25:37 +0100 Subject: [PATCH 05/14] Lint --- Rakefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Rakefile b/Rakefile index b951e3ab2c2..a0a0a2fbd69 100644 --- a/Rakefile +++ b/Rakefile @@ -22,6 +22,7 @@ Dir.glob('tasks/*.rake').each { |r| import r } TEST_METADATA = eval(File.read('Matrixfile')).freeze # rubocop:disable Security/Eval +# rubocop:disable Metrics/BlockLength namespace :test do desc 'Run all tests' task all: TEST_METADATA.map { |k, _| "test:#{k}" } @@ -57,11 +58,11 @@ namespace :test do candidates.each do |appraisal_group, _| env = if appraisal_group.empty? - {} - else - gemfile = File.join(File.dirname(__FILE__), 'gemfiles', "#{ruby_runtime}-#{appraisal_group}.gemfile".tr('-', '_')) - { 'BUNDLE_GEMFILE' => gemfile } - end + {} + else + gemfile = File.join(File.dirname(__FILE__), 'gemfiles', "#{ruby_runtime}-#{appraisal_group}.gemfile".tr('-', '_')) + { 'BUNDLE_GEMFILE' => gemfile } + end command = "bundle check || bundle install && bundle exec rake #{spec_task}" command += "'[#{spec_arguments}]'" if spec_arguments @@ -81,7 +82,6 @@ namespace :test do end desc 'Run RSpec' -# rubocop:disable Metrics/BlockLength namespace :spec do # REMINDER: If adding a new task here, make sure also add it to the `Matrixfile` task all: [:main, :benchmark, From 2745620004edc66fb8278399e2c7af4b13285639 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Wed, 6 Nov 2024 02:54:15 +0100 Subject: [PATCH 06/14] Implement list and install --- tasks/dependency.rake | 110 +++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 40 deletions(-) diff --git a/tasks/dependency.rake b/tasks/dependency.rake index 4af85eb1e9f..44145a408e8 100644 --- a/tasks/dependency.rake +++ b/tasks/dependency.rake @@ -2,48 +2,78 @@ require 'open3' require_relative 'appraisal_conversion' -task :dep => :dependency -task :dependency => %w[dependency:lock] namespace :dependency do - # rubocop:disable Style/MultilineBlockChain - Dir.glob(AppraisalConversion.gemfile_pattern).each do |gemfile| - # desc "Lock the dependencies for #{gemfile}" - task gemfile do - Bundler.with_unbundled_env do - command = +'bundle lock' - command << ' --add-platform x86_64-linux aarch64-linux' unless RUBY_PLATFORM == 'java' - output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, command) - - puts output + gemfiles = Dir.glob(AppraisalConversion.gemfile_pattern) + + # Replacement for `bundle exec appraisal list` + desc "List dependencies for #{AppraisalConversion.runtime_identifier}" + task :list do + puts "Ahoy! Here is a list of gemfiles you are looking for:\n\n" + + puts "========================================\n" + puts gemfiles + puts "========================================\n" + + puts "You can do a bunch of cool stuff by assign it to the BUNDLE_GEMFILE environment variable, like:\n" + puts "`BUNDLE_GEMFILE=#{gemfiles.sample} bundle install`\n\n" + end + + namespace :lock do + gemfiles.each do |gemfile| + # desc "Lock dependencies for #{gemfile}" + task gemfile do + Bundler.with_unbundled_env do + command = +'bundle lock' + command << ' --add-platform x86_64-linux aarch64-linux' unless RUBY_PLATFORM == 'java' + output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, command) + + puts output + end end end - end.tap do |gemfiles| - desc "Lock the dependencies for #{AppraisalConversion.runtime_identifier}" - # WHY can't we use `multitask :lock => gemfiles` here? - # - # Running bundler in parallel has various race conditions - # - # Race condition with the file system, particularly worse with JRuby. - # For instance, `Errno::ENOENT: No such file or directory - bundle` is raised with JRuby 9.2 - - # Even with CRuby, `simplcov` declaration with `github` in Gemfile causes - # race condition for the local gem cache with the following error: - - # ``` - # [/usr/local/bundle/bundler/gems/simplecov-3bb6b7ee58bf/simplecov.gemspec] isn't a Gem::Specification (NilClass instead). - # ``` - - # and - - # ``` - # fatal: Unable to create '/usr/local/bundle/bundler/gems/simplecov-3bb6b7ee58bf/.git/index.lock': File exists. - # Another git process seems to be running in this repository, e.g. - # an editor opened by 'git commit'. Please make sure all processes - # are terminated then try again. If it still fails, a git process - # may have crashed in this repository earlier: - # remove the file manually to continue. - # ``` - task :lock => gemfiles end - # rubocop:enable Style/MultilineBlockChain + + # WHY can't we use `multitask` here? + # + # Running bundler in parallel has various race conditions + # + # Race condition with the file system, particularly worse with JRuby. + # For instance, `Errno::ENOENT: No such file or directory - bundle` is raised with JRuby 9.2 + + # Even with CRuby, `simplcov` declaration with `github` in Gemfile causes + # race condition for the local gem cache with the following error: + + # ``` + # [/usr/local/bundle/bundler/gems/simplecov-3bb6b7ee58bf/simplecov.gemspec] isn't a Gem::Specification (NilClass instead). + # ``` + + # and + + # ``` + # fatal: Unable to create '/usr/local/bundle/bundler/gems/simplecov-3bb6b7ee58bf/.git/index.lock': File exists. + # Another git process seems to be running in this repository, e.g. + # an editor opened by 'git commit'. Please make sure all processes + # are terminated then try again. If it still fails, a git process + # may have crashed in this repository earlier: + # remove the file manually to continue. + # ``` + desc "Lock dependencies for #{AppraisalConversion.runtime_identifier}" + task :lock => gemfiles.map { |gemfile| "lock:#{gemfile}" } + + namespace :install do + gemfiles.each do |gemfile| + # desc "Install dependencies for #{gemfile}" + task gemfile => "lock:#{gemfile}" do + Bundler.with_unbundled_env do + output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, "bundle check || bundle install") + + puts output + end + end + end + end + + # Replacement for `bundle exec appraisal install` + desc "Install dependencies for #{AppraisalConversion.runtime_identifier}" + task :install => gemfiles.map { |gemfile| "install:#{gemfile}" } end From 3b9fc0a906384265ab625303a3f099a106c14a1b Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Wed, 6 Nov 2024 17:07:20 +0100 Subject: [PATCH 07/14] Dynamic glob files --- tasks/dependency.rake | 73 +++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/tasks/dependency.rake b/tasks/dependency.rake index 44145a408e8..ca2049e4f22 100644 --- a/tasks/dependency.rake +++ b/tasks/dependency.rake @@ -3,11 +3,13 @@ require 'open3' require_relative 'appraisal_conversion' namespace :dependency do - gemfiles = Dir.glob(AppraisalConversion.gemfile_pattern) - # Replacement for `bundle exec appraisal list` desc "List dependencies for #{AppraisalConversion.runtime_identifier}" - task :list do + task :list do |t, args| + pattern = args.extras.any? ? args.extras : AppraisalConversion.gemfile_pattern + + gemfiles = Dir.glob(pattern, base: AppraisalConversion.root_path) + puts "Ahoy! Here is a list of gemfiles you are looking for:\n\n" puts "========================================\n" @@ -18,62 +20,37 @@ namespace :dependency do puts "`BUNDLE_GEMFILE=#{gemfiles.sample} bundle install`\n\n" end - namespace :lock do + # Replacement for `bundle exec appraisal bundle lock` + desc "Lock dependencies for #{AppraisalConversion.runtime_identifier}" + task :lock do |t, args| + pattern = args.extras.any? ? args.extras : AppraisalConversion.gemfile_pattern + + gemfiles = Dir.glob(pattern) + gemfiles.each do |gemfile| - # desc "Lock dependencies for #{gemfile}" - task gemfile do - Bundler.with_unbundled_env do - command = +'bundle lock' - command << ' --add-platform x86_64-linux aarch64-linux' unless RUBY_PLATFORM == 'java' - output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, command) + Bundler.with_unbundled_env do + command = +'bundle lock' + command << ' --add-platform x86_64-linux aarch64-linux' unless RUBY_PLATFORM == 'java' + output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, command) - puts output - end + puts output end end end - # WHY can't we use `multitask` here? - # - # Running bundler in parallel has various race conditions - # - # Race condition with the file system, particularly worse with JRuby. - # For instance, `Errno::ENOENT: No such file or directory - bundle` is raised with JRuby 9.2 - - # Even with CRuby, `simplcov` declaration with `github` in Gemfile causes - # race condition for the local gem cache with the following error: - - # ``` - # [/usr/local/bundle/bundler/gems/simplecov-3bb6b7ee58bf/simplecov.gemspec] isn't a Gem::Specification (NilClass instead). - # ``` - - # and + # Replacement for `bundle exec appraisal install` + desc "Install dependencies for #{AppraisalConversion.runtime_identifier}" + task :install => :lock do |t, args| + pattern = args.extras.any? ? args.extras : AppraisalConversion.gemfile_pattern - # ``` - # fatal: Unable to create '/usr/local/bundle/bundler/gems/simplecov-3bb6b7ee58bf/.git/index.lock': File exists. - # Another git process seems to be running in this repository, e.g. - # an editor opened by 'git commit'. Please make sure all processes - # are terminated then try again. If it still fails, a git process - # may have crashed in this repository earlier: - # remove the file manually to continue. - # ``` - desc "Lock dependencies for #{AppraisalConversion.runtime_identifier}" - task :lock => gemfiles.map { |gemfile| "lock:#{gemfile}" } + gemfiles = Dir.glob(pattern) - namespace :install do gemfiles.each do |gemfile| - # desc "Install dependencies for #{gemfile}" - task gemfile => "lock:#{gemfile}" do - Bundler.with_unbundled_env do - output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, "bundle check || bundle install") + Bundler.with_unbundled_env do + output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, "bundle check || bundle install") - puts output - end + puts output end end end - - # Replacement for `bundle exec appraisal install` - desc "Install dependencies for #{AppraisalConversion.runtime_identifier}" - task :install => gemfiles.map { |gemfile| "install:#{gemfile}" } end From 2a4959bbd18f352633c2236af45bff66dc41b21f Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 11 Nov 2024 22:47:18 +0100 Subject: [PATCH 08/14] Update description --- tasks/dependency.rake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/dependency.rake b/tasks/dependency.rake index ca2049e4f22..62a0c60d8c8 100644 --- a/tasks/dependency.rake +++ b/tasks/dependency.rake @@ -16,7 +16,7 @@ namespace :dependency do puts gemfiles puts "========================================\n" - puts "You can do a bunch of cool stuff by assign it to the BUNDLE_GEMFILE environment variable, like:\n" + puts "You can do a bunch of cool stuff by assigning a gemfile path to the BUNDLE_GEMFILE environment variable, like:\n" puts "`BUNDLE_GEMFILE=#{gemfiles.sample} bundle install`\n\n" end From 5914797a57560ab1979372222a0efef857533b18 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 12 Nov 2024 01:30:59 +0100 Subject: [PATCH 09/14] Remove appraisal update from circleci --- .circleci/config.yml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 78fd03f0172..0212f4514f9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -87,11 +87,6 @@ step_appraisal_install: &step_appraisal_install bundle exec appraisal generate # Generate the appraisal files to match the lockfiles in the tree echo "All required gems were found in cache." fi -step_appraisal_update: &step_appraisal_update - run: - name: Update Appraisal gems - command: | # Remove all generated gemfiles and lockfiles, resolve, and install dependencies again - bundle exec appraisal update step_compute_bundle_checksum: &step_compute_bundle_checksum run: name: Compute bundle checksum @@ -181,17 +176,7 @@ orbs: - bundle-{{ .Environment.CIRCLE_CACHE_VERSION }}-{{ checksum ".circleci/images/primary/binary_version" }}-<>-{{ checksum "lib/datadog/version.rb" }} - *check_exact_bundle_cache_hit - *step_bundle_install - - when: - condition: - equal: [ << parameters.edge >>, true ] - steps: - - *step_appraisal_update # Run on latest version of all gems we integrate with - - when: - condition: - not: - equal: [ << parameters.edge >>, true ] - steps: - - *step_appraisal_install # Run on a stable set of gems we integrate with + - *step_appraisal_install # Run on a stable set of gems we integrate with - *save_bundle_checksum - save_cache: key: '{{ .Environment.CIRCLE_CACHE_VERSION }}-bundled-repo-<>-{{ .Environment.CIRCLE_SHA1 }}' From b561e6eb33a02285c6f5447a1415d465ad157d15 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 12 Nov 2024 01:32:46 +0100 Subject: [PATCH 10/14] Ensure appraisal install --- .circleci/config.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0212f4514f9..2e4d199d223 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -81,12 +81,8 @@ step_appraisal_install: &step_appraisal_install run: name: Install Appraisal gems command: | - if [ "$CI_BUNDLE_CACHE_HIT" != 1 ]; then - bundle exec appraisal install - else - bundle exec appraisal generate # Generate the appraisal files to match the lockfiles in the tree - echo "All required gems were found in cache." - fi + bundle exec appraisal generate + bundle exec rake dependency:install step_compute_bundle_checksum: &step_compute_bundle_checksum run: name: Compute bundle checksum From 8bf7a1d1809ec27325d7578aa92e3f01b1e27347 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 12 Nov 2024 01:53:26 +0100 Subject: [PATCH 11/14] Replace open3 with sh in dependency task --- tasks/dependency.rake | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tasks/dependency.rake b/tasks/dependency.rake index 62a0c60d8c8..362baa67f28 100644 --- a/tasks/dependency.rake +++ b/tasks/dependency.rake @@ -1,5 +1,3 @@ -require 'open3' - require_relative 'appraisal_conversion' namespace :dependency do @@ -31,9 +29,7 @@ namespace :dependency do Bundler.with_unbundled_env do command = +'bundle lock' command << ' --add-platform x86_64-linux aarch64-linux' unless RUBY_PLATFORM == 'java' - output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, command) - - puts output + sh({ 'BUNDLE_GEMFILE' => gemfile.to_s }, command) end end end @@ -47,9 +43,7 @@ namespace :dependency do gemfiles.each do |gemfile| Bundler.with_unbundled_env do - output, = Open3.capture2e({ 'BUNDLE_GEMFILE' => gemfile.to_s }, "bundle check || bundle install") - - puts output + sh({ 'BUNDLE_GEMFILE' => gemfile.to_s }, 'bundle check || bundle install') end end end From c240019c845b97becbd446f3ae64c9d5c205458f Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 12 Nov 2024 02:09:14 +0100 Subject: [PATCH 12/14] Restore cache --- .circleci/config.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2e4d199d223..3ff15451f1d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -81,8 +81,13 @@ step_appraisal_install: &step_appraisal_install run: name: Install Appraisal gems command: | - bundle exec appraisal generate - bundle exec rake dependency:install + if [ "$CI_BUNDLE_CACHE_HIT" != 1 ]; then + bundle exec appraisal generate + bundle exec rake dependency:install + else + bundle exec appraisal generate + echo "All required gems were found in cache." + fi step_compute_bundle_checksum: &step_compute_bundle_checksum run: name: Compute bundle checksum From c923f7c16b4190e17f35e8377fafd601be30743c Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 12 Nov 2024 02:20:37 +0100 Subject: [PATCH 13/14] Add gemfiles/*.gemfile to cache checksum --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3ff15451f1d..05029e272f6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,7 +65,7 @@ save_bundle_checksum: &save_bundle_checksum command: | if [ "$CI_BUNDLE_CACHE_HIT" != 1 ]; then # Recompute gemfiles/*.lock checksum, as those files might have changed - cat Gemfile Gemfile.lock Appraisals gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum + cat Gemfile Gemfile.lock Appraisals gemfiles/*.gemfile gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum fi cp .circleci/bundle_checksum /usr/local/bundle/bundle_checksum step_bundle_install: &step_bundle_install @@ -96,7 +96,7 @@ step_compute_bundle_checksum: &step_compute_bundle_checksum # updating the gemset lock files produces extremely large commits. command: | bundle lock # Create Gemfile.lock - cat Gemfile Gemfile.lock Appraisals gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum + cat Gemfile Gemfile.lock Appraisals gemfiles/*.gemfile gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum step_run_all_tests: &step_run_all_tests run: name: Run tests From 48d180ee47c50a962c0a7ff3cbf6809dd1b28fa5 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 12 Nov 2024 02:37:17 +0100 Subject: [PATCH 14/14] Move jq installation earlier --- .circleci/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 05029e272f6..8e08bd15f98 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -285,6 +285,10 @@ orbs: curl --silent --show-error --location --fail --retry 3 --output /usr/local/bin/dockerize $DOCKERIZE_URL chmod +x /usr/local/bin/dockerize dockerize --version + # Test agent uses `jq` to check results + - run: + name: Install jq + command: apt update && apt install jq -y # Wait for containers to start - docker-wait: port: 5432 @@ -307,10 +311,6 @@ orbs: host: "testagent" port: 9126 - *step_run_all_tests - - run: - # Test agent uses `jq` to check results - name: Install jq - command: apt update && apt install jq -y - *step_get_test_agent_trace_check_results - store_test_results: path: /tmp/rspec