Skip to content

Commit 4dcc774

Browse files
authoredOct 7, 2024
otel: Add trace sampling
Adds code to rust to take a trace sampling ratio, and adds code on the Unit side implementing the settings/telemetry/sample_ratio configuration path that will be resolved into a number. Adds a validator that makes sure that the sample ratio is in the range [0, 1]. Signed-off-by: Gabor Javorszky <g.javorszky@f5.com>
1 parent ac1eb67 commit 4dcc774

File tree

4 files changed

+49
-16
lines changed

4 files changed

+49
-16
lines changed
 

‎src/nxt_conf_validation.c

+21
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,9 @@ nxt_inline nxt_int_t nxt_otel_validate_endpoint(nxt_conf_validation_t *vldt,
248248
nxt_int_t nxt_otel_validate_batch_size(nxt_conf_validation_t *vldt,
249249
nxt_conf_value_t *value,
250250
void *data);
251+
nxt_int_t nxt_otel_validate_sample_ratio(nxt_conf_validation_t *vldt,
252+
nxt_conf_value_t *value,
253+
void *data);
251254
nxt_int_t nxt_otel_validate_protocol(nxt_conf_validation_t *vldt,
252255
nxt_conf_value_t *value,
253256
void *data);
@@ -336,6 +339,10 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_otel_members[] = {
336339
.type = NXT_CONF_VLDT_STRING,
337340
.validator = nxt_otel_validate_protocol,
338341
.flags = NXT_CONF_VLDT_REQUIRED
342+
}, {
343+
.name = nxt_string("sampling_ratio"),
344+
.type = NXT_CONF_VLDT_NUMBER,
345+
.validator = nxt_otel_validate_sample_ratio,
339346
},
340347

341348
NXT_CONF_VLDT_END
@@ -1534,6 +1541,20 @@ nxt_otel_validate_batch_size(nxt_conf_validation_t *vldt,
15341541
return NXT_OK;
15351542
}
15361543

1544+
nxt_int_t
1545+
nxt_otel_validate_sample_ratio(nxt_conf_validation_t *vldt,
1546+
nxt_conf_value_t *value,
1547+
void *data)
1548+
{
1549+
double sample_ratio;
1550+
1551+
sample_ratio = nxt_conf_get_number(value);
1552+
if (sample_ratio < 0 || sample_ratio > 1) {
1553+
return NXT_ERROR;
1554+
}
1555+
1556+
return NXT_OK;
1557+
}
15371558

15381559
nxt_int_t
15391560
nxt_otel_validate_protocol(nxt_conf_validation_t *vldt,

‎src/nxt_otel.h

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ extern void * nxt_otel_rs_get_or_create_trace(u_char *trace_id);
1919
extern void nxt_otel_rs_init(void (*log_callback)(u_char *log_string),
2020
const nxt_str_t *endpoint,
2121
const nxt_str_t *protocol,
22+
double sample_fraction,
2223
double batch_size);
2324
extern void nxt_otel_rs_copy_traceparent(u_char *buffer, void *span);
2425
extern void nxt_otel_rs_add_event_to_trace(void *trace,

‎src/nxt_router.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#define NXT_SHARED_PORT_ID 0xFFFFu
2626
#if (NXT_HAVE_OTEL)
2727
#define NXT_OTEL_BATCH_DEFAULT 128
28+
#define NXT_OTEL_SAMPLING_DEFAULT 1
2829
#endif
2930

3031
typedef struct {
@@ -1645,9 +1646,9 @@ nxt_router_conf_create(nxt_task_t *task, nxt_router_temp_conf_t *tmcf,
16451646
nxt_conf_value_t *js_module;
16461647
#endif
16471648
#if (NXT_HAVE_OTEL)
1648-
nxt_conf_value_t *otel, *otel_endpoint, *otel_batching, *otel_proto;
1649+
nxt_conf_value_t *otel, *otel_endpoint, *otel_sampling, *otel_batching, *otel_proto;
16491650
nxt_str_t telemetry_endpoint, telemetry_proto;
1650-
double telemetry_batching;
1651+
double telemetry_sample_fraction, telemetry_batching;
16511652
#endif
16521653
nxt_conf_value_t *root, *conf, *http, *value, *websocket;
16531654
nxt_conf_value_t *applications, *application, *settings;
@@ -1688,6 +1689,7 @@ nxt_router_conf_create(nxt_task_t *task, nxt_router_temp_conf_t *tmcf,
16881689
static const nxt_str_t telemetry_path = nxt_string("/settings/telemetry");
16891690
static const nxt_str_t telemetry_endpoint_path = nxt_string("/settings/telemetry/endpoint");
16901691
static const nxt_str_t telemetry_batch_path = nxt_string("/settings/telemetry/batch_size");
1692+
static const nxt_str_t telemetry_sample_path = nxt_string("/settings/telemetry/sampling_ratio");
16911693
static const nxt_str_t telemetry_proto_path = nxt_string("/settings/telemetry/protocol");
16921694
#endif
16931695

@@ -2195,16 +2197,19 @@ nxt_router_conf_create(nxt_task_t *task, nxt_router_temp_conf_t *tmcf,
21952197
otel = nxt_conf_get_path(root, &telemetry_path);
21962198
otel_endpoint = nxt_conf_get_path(root, &telemetry_endpoint_path);
21972199
otel_batching = nxt_conf_get_path(root, &telemetry_batch_path);
2200+
otel_sampling = nxt_conf_get_path(root, &telemetry_sample_path);
21982201
otel_proto = nxt_conf_get_path(root, &telemetry_proto_path);
21992202

22002203
if (otel) {
22012204
nxt_conf_get_string(otel_endpoint, &telemetry_endpoint);
22022205
nxt_conf_get_string(otel_proto, &telemetry_proto);
22032206
telemetry_batching = otel_batching ? nxt_conf_get_number(otel_batching) : NXT_OTEL_BATCH_DEFAULT;
2207+
telemetry_sample_fraction = otel_sampling ? nxt_conf_get_number(otel_sampling) : NXT_OTEL_SAMPLING_DEFAULT;
22042208

22052209
nxt_otel_rs_init(&nxt_otel_log_callback,
22062210
&telemetry_endpoint,
22072211
&telemetry_proto,
2212+
telemetry_sample_fraction,
22082213
telemetry_batching);
22092214
} else {
22102215
nxt_otel_rs_uninit();

‎src/otel/src/lib.rs

+20-14
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ use opentelemetry::trace::{
55
use opentelemetry::{global, KeyValue};
66
use opentelemetry_otlp::Protocol;
77
use opentelemetry_otlp::WithExportConfig;
8-
use opentelemetry_sdk::trace::{Config, BatchConfigBuilder};
8+
use opentelemetry_sdk::trace::{Config, BatchConfigBuilder, Sampler};
99
use opentelemetry_sdk::{runtime, Resource};
1010
use std::ffi::{CStr, CString};
11-
use std::ptr;
11+
use std::{ptr, time};
1212
use std::ptr::addr_of;
1313
use std::slice;
1414
use std::sync::{Arc, OnceLock};
@@ -17,7 +17,7 @@ use tokio::sync::mpsc::{Receiver, Sender};
1717

1818

1919
const TRACEPARENT_HEADER_LEN: u8 = 55;
20-
20+
const TIMEOUT: time::Duration = std::time::Duration::from_secs(10);
2121

2222
#[repr(C)]
2323
pub struct nxt_str_t {
@@ -64,6 +64,7 @@ unsafe fn nxt_otel_rs_init(
6464
log_callback: unsafe extern "C" fn(*mut i8),
6565
endpoint: *const nxt_str_t,
6666
protocol: *const nxt_str_t,
67+
sample_fraction: f64,
6768
batch_size: f64
6869
) {
6970
if endpoint.is_null() ||
@@ -110,6 +111,7 @@ unsafe fn nxt_otel_rs_init(
110111
ep,
111112
proto,
112113
batch_size,
114+
sample_fraction,
113115
rx
114116
));
115117
},
@@ -132,16 +134,21 @@ async unsafe fn nxt_otel_rs_runtime(
132134
endpoint: String,
133135
proto: Protocol,
134136
batch_size: f64,
137+
sample_fraction: f64,
135138
mut rx: Receiver<SpanMessage>
136139
) {
137140
let pipeline = opentelemetry_otlp::new_pipeline()
138141
.tracing()
139-
.with_trace_config(Config::default().with_resource(
140-
Resource::new(vec![KeyValue::new(
141-
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
142-
"NGINX Unit",
143-
)]),
144-
))
142+
.with_trace_config(
143+
Config::default()
144+
.with_resource(
145+
Resource::new(vec![KeyValue::new(
146+
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
147+
"NGINX Unit",
148+
)])
149+
)
150+
.with_sampler(Sampler::TraceIdRatioBased(sample_fraction))
151+
)
145152
.with_batch_config(
146153
BatchConfigBuilder::default()
147154
.with_max_export_batch_size(batch_size as _)
@@ -156,15 +163,15 @@ async unsafe fn nxt_otel_rs_runtime(
156163
.with_http_client(reqwest::Client::new()) // needed because rustls feature
157164
.with_endpoint(endpoint)
158165
.with_protocol(proto)
159-
.with_timeout(std::time::Duration::new(10, 0))
166+
.with_timeout(TIMEOUT)
160167
).install_batch(runtime::Tokio),
161168
Protocol::Grpc => pipeline
162169
.with_exporter(
163170
opentelemetry_otlp::new_exporter()
164171
.tonic()
165172
.with_endpoint(endpoint)
166173
.with_protocol(proto)
167-
.with_timeout(std::time::Duration::new(10, 0))
174+
.with_timeout(TIMEOUT)
168175
).install_batch(runtime::Tokio),
169176
};
170177

@@ -211,7 +218,7 @@ pub unsafe fn nxt_otel_rs_copy_traceparent(buf: *mut i8, span: *const BoxedSpan)
211218
(*span).span_context().trace_flags() // 1 char, 2 hex
212219
);
213220

214-
assert_eq!(traceparent.len(), TRACEPARENT_HEADER_LEN as _);
221+
assert_eq!(traceparent.len(), TRACEPARENT_HEADER_LEN as usize);
215222

216223
ptr::copy_nonoverlapping(
217224
traceparent.as_bytes().as_ptr(),
@@ -270,8 +277,7 @@ pub unsafe fn nxt_otel_rs_get_or_create_trace(trace_id: *mut i8) -> *mut BoxedSp
270277
}
271278

272279
#[no_mangle]
273-
#[tokio::main]
274-
pub async unsafe fn nxt_otel_rs_send_trace(trace: *mut BoxedSpan) {
280+
pub unsafe fn nxt_otel_rs_send_trace(trace: *mut BoxedSpan) {
275281
// damage nothing on an improper call
276282
if trace.is_null() {
277283
eprintln!("trace was null, returning");

0 commit comments

Comments
 (0)