Skip to content

Commit

Permalink
Datadog exporter resource behavior and mapping (#5386)
Browse files Browse the repository at this point in the history
Signed-off-by: Benjamin Coenen <5719034+bnjjj@users.noreply.github.com>
Co-authored-by: bryn <bryn@apollographql.com>
Co-authored-by: Bryn Cooke <BrynCooke@gmail.com>
  • Loading branch information
3 people authored Jul 3, 2024
1 parent ceac7ad commit bc0f8a9
Show file tree
Hide file tree
Showing 37 changed files with 1,047 additions and 161 deletions.
49 changes: 49 additions & 0 deletions .changesets/breaking_bnjjj_fix_5282.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
### Datadog exporter resource behavior and mapping ([Issue #5282](https://github.com/apollographql/router/issues/5282))

Users of the Datadog trace exporter may have noticed that span and resource naming is not as expected.
Unlike other APMs, Datadog expects static span names, and then uses resource mapping to provide additional context.

The default behavior of the Datadog exporter has now been changed to support this and give a better user experience.

```yaml
telemetry:
exporters:
tracing:
datadog:
enabled: true
# Enables resource mapping, previously disabled by default, but now enabled.
enable_span_mapping: true
# Enables fixed span names, defaults to true.
fixed_span_names: true

instrumentation:
spans:
mode: spec_compliant

```
The following default resource mappings are applied:
| OpenTelemetry Span Name | Datadog Span Operation Name |
|-------------------------|-----------------------------|
| `request` | `http.route` |
| `router` | `http.route` |
| `supergraph` | `graphql.operation.name` |
| `query_planning` | `graphql.operation.name` |
| `subgraph` | `subgraph.name` |
| `subgraph_request` | `graphql.operation.name` |
| `http_request` | `http.route` |

You can override the default resource mappings by specifying the `resource_mapping` configuration:

```yaml
exporters:
tracing:
datadog:
enabled: true
resource_mapping:
# Use `my.span.attribute` as the resource name for the `router` span
router: "my.span.attribute"
```
By [@bnjjj](https://github.com/bnjjj) and [@bryncooke](https://github.com/bryncooke) in https://github.com/apollographql/router/pull/5386
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ executors:
- image: cimg/redis:7.2.4
- image: jaegertracing/all-in-one:1.54.0
- image: openzipkin/zipkin:2.23.2
- image: ghcr.io/datadog/dd-apm-test-agent/ddapm-test-agent:v1.17.0
resource_class: xlarge
environment:
CARGO_BUILD_JOBS: 4
Expand Down
6 changes: 2 additions & 4 deletions apollo-router/src/axum_factory/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@ use tower_http::trace::MakeSpan;
use tower_service::Service;
use tracing::Span;

use crate::plugins::telemetry::consts::OTEL_STATUS_CODE;
use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_ERROR;
use crate::plugins::telemetry::SpanMode;
use crate::plugins::telemetry::OTEL_STATUS_CODE;
use crate::plugins::telemetry::OTEL_STATUS_CODE_ERROR;
use crate::uplink::license_enforcement::LicenseState;
use crate::uplink::license_enforcement::LICENSE_EXPIRED_SHORT_MESSAGE;

pub(crate) const REQUEST_SPAN_NAME: &str = "request";

#[derive(Clone, Default)]
pub(crate) struct PropagatingMakeSpan {
pub(crate) license: LicenseState,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1435,7 +1435,7 @@ expression: "&schema"
"description": "#/definitions/BatchProcessorConfig"
},
"enable_span_mapping": {
"default": false,
"default": true,
"description": "Enable datadog span mapping for span name and resource name.",
"type": "boolean"
},
Expand All @@ -1446,6 +1446,19 @@ expression: "&schema"
"endpoint": {
"$ref": "#/definitions/UriEndpoint",
"description": "#/definitions/UriEndpoint"
},
"fixed_span_names": {
"default": true,
"description": "Fixes the span names, this means that the APM view will show the original span names in the operation dropdown.",
"type": "boolean"
},
"resource_mapping": {
"additionalProperties": {
"type": "string"
},
"default": {},
"description": "Custom mapping to be used as the resource field in spans, defaults to: router -> http.route supergraph -> graphql.operation.name query_planning -> graphql.operation.name subgraph -> subgraph.name subgraph_request -> subgraph.name http_request -> http.route",
"type": "object"
}
},
"required": [
Expand Down Expand Up @@ -6906,6 +6919,13 @@ expression: "&schema"
"decimal"
],
"type": "string"
},
{
"description": "Datadog",
"enum": [
"datadog"
],
"type": "string"
}
]
},
Expand Down
4 changes: 2 additions & 2 deletions apollo-router/src/plugins/telemetry/apollo_otlp_exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ use crate::plugins::telemetry::apollo::router_id;
use crate::plugins::telemetry::apollo_exporter::get_uname;
use crate::plugins::telemetry::apollo_exporter::ROUTER_REPORT_TYPE_TRACES;
use crate::plugins::telemetry::apollo_exporter::ROUTER_TRACING_PROTOCOL_OTLP;
use crate::plugins::telemetry::consts::SUBGRAPH_SPAN_NAME;
use crate::plugins::telemetry::consts::SUPERGRAPH_SPAN_NAME;
use crate::plugins::telemetry::tracing::apollo_telemetry::APOLLO_PRIVATE_OPERATION_SIGNATURE;
use crate::plugins::telemetry::tracing::BatchProcessorConfig;
use crate::plugins::telemetry::GLOBAL_TRACER_NAME;
use crate::plugins::telemetry::SUBGRAPH_SPAN_NAME;
use crate::plugins::telemetry::SUPERGRAPH_SPAN_NAME;

/// The Apollo Otlp exporter is a thin wrapper around the OTLP SpanExporter.
#[derive(Clone, Derivative)]
Expand Down
3 changes: 3 additions & 0 deletions apollo-router/src/plugins/telemetry/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ pub(crate) enum TraceIdFormat {
///
/// (e.g. Trace ID 16 -> 16)
Decimal,

/// Datadog
Datadog,
}

/// Apollo usage report signature normalization algorithm
Expand Down
5 changes: 3 additions & 2 deletions apollo-router/src/plugins/telemetry/config_new/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ mod test {
use crate::plugins::telemetry::config_new::DefaultForLevel;
use crate::plugins::telemetry::config_new::Selectors;
use crate::plugins::telemetry::otlp::TelemetryDataKind;
use crate::plugins::telemetry::OTEL_NAME;
use crate::services::router;
use crate::services::subgraph;
use crate::services::supergraph;
Expand Down Expand Up @@ -548,7 +549,7 @@ mod test {
},
);
spans.attributes.custom.insert(
"otel.name".to_string(),
OTEL_NAME.to_string(),
Conditional {
selector: RouterSelector::StaticField {
r#static: String::from("new_name").into(),
Expand All @@ -568,7 +569,7 @@ mod test {
.any(|key_val| key_val.key == opentelemetry::Key::from_static_str("test")));

assert!(values.iter().any(|key_val| key_val.key
== opentelemetry::Key::from_static_str("otel.name")
== opentelemetry::Key::from_static_str(OTEL_NAME)
&& key_val.value == opentelemetry::Value::String(String::from("new_name").into())));
}

Expand Down
31 changes: 31 additions & 0 deletions apollo-router/src/plugins/telemetry/consts.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
pub(crate) const OTEL_NAME: &str = "otel.name";
pub(crate) const OTEL_ORIGINAL_NAME: &str = "otel.original_name";
pub(crate) const OTEL_KIND: &str = "otel.kind";
pub(crate) const OTEL_STATUS_CODE: &str = "otel.status_code";
pub(crate) const OTEL_STATUS_MESSAGE: &str = "otel.status_message";
#[allow(dead_code)]
pub(crate) const OTEL_STATUS_DESCRIPTION: &str = "otel.status_description";
pub(crate) const OTEL_STATUS_CODE_OK: &str = "OK";
pub(crate) const OTEL_STATUS_CODE_ERROR: &str = "ERROR";

pub(crate) const FIELD_EXCEPTION_MESSAGE: &str = "exception.message";
pub(crate) const FIELD_EXCEPTION_STACKTRACE: &str = "exception.stacktrace";
pub(crate) const SUPERGRAPH_SPAN_NAME: &str = "supergraph";
pub(crate) const SUBGRAPH_SPAN_NAME: &str = "subgraph";
pub(crate) const ROUTER_SPAN_NAME: &str = "router";
pub(crate) const EXECUTION_SPAN_NAME: &str = "execution";
pub(crate) const REQUEST_SPAN_NAME: &str = "request";
pub(crate) const QUERY_PLANNING_SPAN_NAME: &str = "query_planning";
pub(crate) const HTTP_REQUEST_SPAN_NAME: &str = "http_request";
pub(crate) const SUBGRAPH_REQUEST_SPAN_NAME: &str = "subgraph_request";

pub(crate) const BUILT_IN_SPAN_NAMES: [&str; 8] = [
REQUEST_SPAN_NAME,
ROUTER_SPAN_NAME,
SUPERGRAPH_SPAN_NAME,
SUBGRAPH_SPAN_NAME,
SUBGRAPH_REQUEST_SPAN_NAME,
HTTP_REQUEST_SPAN_NAME,
QUERY_PLANNING_SPAN_NAME,
EXECUTION_SPAN_NAME,
];
18 changes: 10 additions & 8 deletions apollo-router/src/plugins/telemetry/dynamic_attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ use tracing_subscriber::registry::LookupSpan;
use tracing_subscriber::Layer;
use tracing_subscriber::Registry;

use super::consts::OTEL_KIND;
use super::consts::OTEL_NAME;
use super::consts::OTEL_STATUS_CODE;
use super::consts::OTEL_STATUS_MESSAGE;
use super::formatters::APOLLO_PRIVATE_PREFIX;
use super::otel::layer::str_to_span_kind;
use super::otel::layer::str_to_status;
use super::otel::layer::SPAN_KIND_FIELD;
use super::otel::layer::SPAN_NAME_FIELD;
use super::otel::layer::SPAN_STATUS_CODE_FIELD;
use super::otel::layer::SPAN_STATUS_MESSAGE_FIELD;
use super::otel::OtelData;
use super::reload::IsSampled;

Expand Down Expand Up @@ -197,10 +197,12 @@ impl SpanDynAttribute for ::tracing::Span {

fn update_otel_data(otel_data: &mut OtelData, key: &Key, value: &opentelemetry::Value) {
match key.as_str() {
SPAN_NAME_FIELD => otel_data.forced_span_name = Some(value.to_string()),
SPAN_KIND_FIELD => otel_data.builder.span_kind = str_to_span_kind(&value.as_str()),
SPAN_STATUS_CODE_FIELD => otel_data.forced_status = str_to_status(&value.as_str()).into(),
SPAN_STATUS_MESSAGE_FIELD => {
OTEL_NAME if otel_data.forced_span_name.is_none() => {
otel_data.forced_span_name = Some(value.to_string())
}
OTEL_KIND => otel_data.builder.span_kind = str_to_span_kind(&value.as_str()),
OTEL_STATUS_CODE => otel_data.forced_status = str_to_status(&value.as_str()).into(),
OTEL_STATUS_MESSAGE => {
otel_data.builder.status =
opentelemetry::trace::Status::error(value.as_str().to_string())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ use tracing_subscriber::layer::Context;
use tracing_subscriber::registry::LookupSpan;
use tracing_subscriber::Layer;

use crate::axum_factory::utils::REQUEST_SPAN_NAME;
use crate::plugins::telemetry::EXECUTION_SPAN_NAME;
use crate::plugins::telemetry::SUBGRAPH_SPAN_NAME;
use crate::plugins::telemetry::SUPERGRAPH_SPAN_NAME;
use crate::services::QUERY_PLANNING_SPAN_NAME;
use crate::plugins::telemetry::consts::EXECUTION_SPAN_NAME;
use crate::plugins::telemetry::consts::QUERY_PLANNING_SPAN_NAME;
use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME;
use crate::plugins::telemetry::consts::SUBGRAPH_SPAN_NAME;
use crate::plugins::telemetry::consts::SUPERGRAPH_SPAN_NAME;

const SUBGRAPH_ATTRIBUTE_NAME: &str = "apollo.subgraph.name";

Expand Down
48 changes: 27 additions & 21 deletions apollo-router/src/plugins/telemetry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use opentelemetry::trace::TraceState;
use opentelemetry::trace::TracerProvider;
use opentelemetry::Key;
use opentelemetry::KeyValue;
use opentelemetry_api::trace::TraceId;
use opentelemetry_semantic_conventions::trace::HTTP_REQUEST_METHOD;
use parking_lot::Mutex;
use rand::Rng;
Expand Down Expand Up @@ -78,7 +79,6 @@ use self::tracing::apollo_telemetry::CLIENT_NAME_KEY;
use self::tracing::apollo_telemetry::CLIENT_VERSION_KEY;
use crate::apollo_studio_interop::ExtendedReferenceStats;
use crate::apollo_studio_interop::ReferencedEnums;
use crate::axum_factory::utils::REQUEST_SPAN_NAME;
use crate::context::CONTAINS_GRAPHQL_ERROR;
use crate::context::OPERATION_KIND;
use crate::context::OPERATION_NAME;
Expand All @@ -100,6 +100,15 @@ use crate::plugins::telemetry::config::TracingCommon;
use crate::plugins::telemetry::config_new::cost::add_cost_attributes;
use crate::plugins::telemetry::config_new::graphql::GraphQLInstruments;
use crate::plugins::telemetry::config_new::instruments::SupergraphInstruments;
use crate::plugins::telemetry::config_new::trace_id;
use crate::plugins::telemetry::config_new::DatadogId;
use crate::plugins::telemetry::consts::EXECUTION_SPAN_NAME;
use crate::plugins::telemetry::consts::OTEL_NAME;
use crate::plugins::telemetry::consts::OTEL_STATUS_CODE;
use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_ERROR;
use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_OK;
use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME;
use crate::plugins::telemetry::consts::ROUTER_SPAN_NAME;
use crate::plugins::telemetry::dynamic_attribute::SpanDynAttribute;
use crate::plugins::telemetry::fmt_layer::create_fmt_layer;
use crate::plugins::telemetry::metrics::apollo::histogram::ListLengthHistogram;
Expand Down Expand Up @@ -134,7 +143,6 @@ use crate::services::SubgraphResponse;
use crate::services::SupergraphRequest;
use crate::services::SupergraphResponse;
use crate::spec::operation_limits::OperationLimits;
use crate::tracer::TraceId;
use crate::Context;
use crate::ListenAddr;

Expand All @@ -143,6 +151,7 @@ pub(crate) mod apollo_exporter;
pub(crate) mod apollo_otlp_exporter;
pub(crate) mod config;
pub(crate) mod config_new;
pub(crate) mod consts;
pub(crate) mod dynamic_attribute;
mod endpoint;
mod fmt_layer;
Expand All @@ -159,22 +168,12 @@ pub(crate) mod tracing;
pub(crate) mod utils;

// Tracing consts
pub(crate) const SUPERGRAPH_SPAN_NAME: &str = "supergraph";
pub(crate) const SUBGRAPH_SPAN_NAME: &str = "subgraph";
pub(crate) const ROUTER_SPAN_NAME: &str = "router";
pub(crate) const EXECUTION_SPAN_NAME: &str = "execution";
const CLIENT_NAME: &str = "apollo_telemetry::client_name";
const CLIENT_VERSION: &str = "apollo_telemetry::client_version";
const SUBGRAPH_FTV1: &str = "apollo_telemetry::subgraph_ftv1";
pub(crate) const STUDIO_EXCLUDE: &str = "apollo_telemetry::studio::exclude";
pub(crate) const LOGGING_DISPLAY_HEADERS: &str = "apollo_telemetry::logging::display_headers";
pub(crate) const LOGGING_DISPLAY_BODY: &str = "apollo_telemetry::logging::display_body";

pub(crate) const OTEL_STATUS_CODE: &str = "otel.status_code";
#[allow(dead_code)]
pub(crate) const OTEL_STATUS_DESCRIPTION: &str = "otel.status_description";
pub(crate) const OTEL_STATUS_CODE_OK: &str = "OK";
pub(crate) const OTEL_STATUS_CODE_ERROR: &str = "ERROR";
const GLOBAL_TRACER_NAME: &str = "apollo-router";
const DEFAULT_EXPOSE_TRACE_ID_HEADER: &str = "apollo-trace-id";
static DEFAULT_EXPOSE_TRACE_ID_HEADER_NAME: HeaderName =
Expand Down Expand Up @@ -337,11 +336,17 @@ impl Plugin for Telemetry {
span.record("graphql.operation.name", operation_name);
}
match (&operation_kind, &operation_name) {
(Ok(Some(kind)), Ok(Some(name))) => {
span.record("otel.name", format!("{kind} {name}"))
(Ok(Some(kind)), Ok(Some(name))) => span.set_span_dyn_attribute(
OTEL_NAME.into(),
format!("{kind} {name}").into(),
),
(Ok(Some(kind)), _) => {
span.set_span_dyn_attribute(OTEL_NAME.into(), kind.clone().into())
}
(Ok(Some(kind)), _) => span.record("otel.name", kind),
_ => span.record("otel.name", "GraphQL Operation"),
_ => span.set_span_dyn_attribute(
OTEL_NAME.into(),
"GraphQL Operation".into(),
),
};
}
}
Expand Down Expand Up @@ -556,17 +561,18 @@ impl Plugin for Telemetry {
});

// Append the trace ID with the right format, based on the config
let format_id = |trace: TraceId| {
let format_id = |trace_id: TraceId| {
let id = match config.exporters.tracing.response_trace_id.format {
TraceIdFormat::Hexadecimal => format!("{:032x}", trace.to_u128()),
TraceIdFormat::Decimal => format!("{}", trace.to_u128()),
TraceIdFormat::Hexadecimal => format!("{:032x}", trace_id),
TraceIdFormat::Decimal => format!("{}", u128::from_be_bytes(trace_id.to_bytes())),
TraceIdFormat::Datadog => trace_id.to_datadog()
};

HeaderValue::from_str(&id).ok()
};
if let (Some(header_name), Some(trace_id)) = (
expose_trace_id_header,
TraceId::current().and_then(format_id),
trace_id().and_then(format_id),
) {
resp.response.headers_mut().append(header_name, trace_id);
}
Expand Down Expand Up @@ -859,7 +865,7 @@ impl Telemetry {
if propagation.zipkin || tracing.zipkin.enabled {
propagators.push(Box::<opentelemetry_zipkin::Propagator>::default());
}
if propagation.datadog || tracing.datadog.enabled {
if propagation.datadog || tracing.datadog.enabled() {
propagators.push(Box::<opentelemetry_datadog::DatadogPropagator>::default());
}
if propagation.aws_xray {
Expand Down
Loading

0 comments on commit bc0f8a9

Please sign in to comment.