From 3bc654266ad36e8eccc68a047100c74d369bbad9 Mon Sep 17 00:00:00 2001 From: bryn Date: Tue, 6 Aug 2024 16:20:53 +0100 Subject: [PATCH 1/3] Revert "Datadog sampling priority not set (#5703)" This reverts commit 901eb563a1c669542e99e066a70cbfa6612ee5d2. --- .../datadog_exporter/exporter/model/v05.rs | 8 +++- .../telemetry/tracing/datadog_exporter/mod.rs | 41 +++++++++++++++---- .../tests/integration/telemetry/datadog.rs | 14 ------- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v05.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v05.rs index 8cd3f8e66f..fd1590966e 100644 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v05.rs +++ b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v05.rs @@ -128,8 +128,12 @@ fn write_unified_tag<'a>( Ok(()) } -fn get_sampling_priority(_span: &SpanData) -> f64 { - 1.0 +fn get_sampling_priority(span: &SpanData) -> f64 { + if span.span_context.trace_state().priority_sampling_enabled() { + 1.0 + } else { + 0.0 + } } fn get_measuring(span: &SpanData) -> f64 { diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/mod.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/mod.rs index d632eb5872..1c586d48c8 100644 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/mod.rs +++ b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/mod.rs @@ -176,7 +176,7 @@ pub(crate) mod propagator { const DATADOG_SAMPLING_PRIORITY_HEADER: &str = "x-datadog-sampling-priority"; const TRACE_FLAG_DEFERRED: TraceFlags = TraceFlags::new(0x02); - pub(crate) const TRACE_STATE_PRIORITY_SAMPLING: &str = "psr"; + const TRACE_STATE_PRIORITY_SAMPLING: &str = "psr"; pub(crate) const TRACE_STATE_MEASURE: &str = "m"; pub(crate) const TRACE_STATE_TRUE_VALUE: &str = "1"; pub(crate) const TRACE_STATE_FALSE_VALUE: &str = "0"; @@ -243,6 +243,10 @@ pub(crate) mod propagator { fn with_measuring(&self, enabled: bool) -> TraceState; fn measuring_enabled(&self) -> bool; + + fn with_priority_sampling(&self, enabled: bool) -> TraceState; + + fn priority_sampling_enabled(&self) -> bool; } impl DatadogTraceState for TraceState { @@ -256,6 +260,20 @@ pub(crate) mod propagator { .map(trace_flag_to_boolean) .unwrap_or_default() } + + fn with_priority_sampling(&self, enabled: bool) -> TraceState { + self.insert( + TRACE_STATE_PRIORITY_SAMPLING, + boolean_to_trace_state_flag(enabled), + ) + .unwrap_or_else(|_err| self.clone()) + } + + fn priority_sampling_enabled(&self) -> bool { + self.get(TRACE_STATE_PRIORITY_SAMPLING) + .map(trace_flag_to_boolean) + .unwrap_or_default() + } } enum SamplingPriority { @@ -293,7 +311,16 @@ pub(crate) mod propagator { } fn create_trace_state_and_flags(trace_flags: TraceFlags) -> (TraceState, TraceFlags) { - (TraceState::default(), trace_flags) + if trace_flags & TRACE_FLAG_DEFERRED == TRACE_FLAG_DEFERRED { + (TraceState::default(), trace_flags) + } else { + ( + DatadogTraceStateBuilder::default() + .with_priority_sampling(trace_flags.is_sampled()) + .build(), + TraceFlags::SAMPLED, + ) + } } impl DatadogPropagator { @@ -373,7 +400,7 @@ pub(crate) mod propagator { } fn get_sampling_priority(span_context: &SpanContext) -> SamplingPriority { - if span_context.is_sampled() { + if span_context.trace_state().priority_sampling_enabled() { SamplingPriority::AutoKeep } else { SamplingPriority::AutoReject @@ -433,8 +460,8 @@ pub(crate) mod propagator { (vec![(DATADOG_TRACE_ID_HEADER, "garbage")], SpanContext::empty_context()), (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "garbage")], SpanContext::new(TraceId::from_u128(1234), SpanId::INVALID, TRACE_FLAG_DEFERRED, true, TraceState::default())), (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TRACE_FLAG_DEFERRED, true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "0")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::default(), true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, TraceState::default())), + (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "0")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(false).build())), + (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(true).build())), ] } @@ -446,8 +473,8 @@ pub(crate) mod propagator { (vec![], SpanContext::new(TraceId::from_hex("1234").unwrap(), SpanId::INVALID, TRACE_FLAG_DEFERRED, true, TraceState::default())), (vec![], SpanContext::new(TraceId::from_hex("1234").unwrap(), SpanId::INVALID, TraceFlags::SAMPLED, true, TraceState::default())), (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TRACE_FLAG_DEFERRED, true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "0")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::default(), true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, TraceState::default())), + (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "0")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(false).build())), + (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(true).build())), ] } diff --git a/apollo-router/tests/integration/telemetry/datadog.rs b/apollo-router/tests/integration/telemetry/datadog.rs index 613242ca39..9a13512e79 100644 --- a/apollo-router/tests/integration/telemetry/datadog.rs +++ b/apollo-router/tests/integration/telemetry/datadog.rs @@ -427,7 +427,6 @@ impl TraceSpec { tracing::debug!("{}", serde_json::to_string_pretty(&trace)?); self.verify_trace_participants(&trace)?; self.verify_operation_name(&trace)?; - self.verify_priority_sampled(&trace)?; self.verify_version(&trace)?; self.verify_spans_present(&trace)?; self.validate_span_kinds(&trace)?; @@ -577,17 +576,4 @@ impl TraceSpec { } Ok(()) } - - fn verify_priority_sampled(&self, trace: &Value) -> Result<(), BoxError> { - let binding = trace.select_path("$.._sampling_priority_v1")?; - let sampling_priority = binding.first(); - assert_eq!( - sampling_priority - .expect("sampling priority expected") - .as_f64() - .expect("sampling priority must be a number"), - 1.0 - ); - Ok(()) - } } From 29ee373ed5046926a4647a94952fbd3315d17d38 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 7 Aug 2024 11:22:29 +0100 Subject: [PATCH 2/3] Re-add lost test --- .../tests/integration/telemetry/datadog.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/apollo-router/tests/integration/telemetry/datadog.rs b/apollo-router/tests/integration/telemetry/datadog.rs index 9a13512e79..613242ca39 100644 --- a/apollo-router/tests/integration/telemetry/datadog.rs +++ b/apollo-router/tests/integration/telemetry/datadog.rs @@ -427,6 +427,7 @@ impl TraceSpec { tracing::debug!("{}", serde_json::to_string_pretty(&trace)?); self.verify_trace_participants(&trace)?; self.verify_operation_name(&trace)?; + self.verify_priority_sampled(&trace)?; self.verify_version(&trace)?; self.verify_spans_present(&trace)?; self.validate_span_kinds(&trace)?; @@ -576,4 +577,17 @@ impl TraceSpec { } Ok(()) } + + fn verify_priority_sampled(&self, trace: &Value) -> Result<(), BoxError> { + let binding = trace.select_path("$.._sampling_priority_v1")?; + let sampling_priority = binding.first(); + assert_eq!( + sampling_priority + .expect("sampling priority expected") + .as_f64() + .expect("sampling priority must be a number"), + 1.0 + ); + Ok(()) + } } From ebfcff5a3f8beb301e13857e59a5507e25424c5f Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 7 Aug 2024 11:30:05 +0100 Subject: [PATCH 3/3] Changelog --- .changesets/fix_bryn_revert_5703.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changesets/fix_bryn_revert_5703.md diff --git a/.changesets/fix_bryn_revert_5703.md b/.changesets/fix_bryn_revert_5703.md new file mode 100644 index 0000000000..56145fc3cd --- /dev/null +++ b/.changesets/fix_bryn_revert_5703.md @@ -0,0 +1,5 @@ +### Datadog underreported APM metrics ([PR #5780](https://github.com/apollographql/router/pull/5780)) + +This reverts [PR #5703](https://github.com/apollographql/router/pull/5703) which causes Datadog APM span metrics to be under-reported. + +By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/5780