Skip to content

Commit

Permalink
cleanedup unwanted metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
VineethPeddi committed Apr 8, 2024
1 parent 7411cf6 commit dcd9704
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 57 deletions.
35 changes: 0 additions & 35 deletions collect/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,6 @@ func (i *InMemCollector) Start() error {
// listen for config reloads
i.Config.RegisterReloadCallback(i.sendReloadSignal)

i.Metrics.Register("trace_duration_ms", "histogram")
i.Metrics.Register("trace_spans_count_total", "histogram")
i.Metrics.Register("collector_tosend_queue", "histogram")
i.Metrics.Register("collector_incoming_queue", "histogram")
i.Metrics.Register("collector_peer_queue", "histogram")
i.Metrics.Register("collector_cache_size", "gauge")
Expand All @@ -125,12 +122,6 @@ func (i *InMemCollector) Start() error {
i.Metrics.Register(TraceSendEjectedFull, "counter")
i.Metrics.Register(TraceSendEjectedMemsize, "counter")

i.Metrics.RegisterWithDescriptionLabels(
"trace_operations_latency_ms",
"gauge",
"Trace latency wrt each trace operation",
[]string{"service_name", "operation", "app", "instance", "transaction_type", "transaction_category", "transaction_sub_category", "language"},
)
i.Metrics.RegisterWithDescriptionLabels(
"trace_operations_failed",
"counter",
Expand All @@ -149,24 +140,6 @@ func (i *InMemCollector) Start() error {
"Total Number of events in spans wrt each trace operation",
[]string{"service_name", "operation", "app", "instance", "transaction_type", "transaction_category", "transaction_sub_category", "language"},
)
i.Metrics.RegisterWithDescriptionLabels(
"trace_root_span",
"counter",
"Number of root spans in an operation",
[]string{"service_name", "operation", "app", "instance", "transaction_type", "transaction_category", "transaction_sub_category", "language"},
)
i.Metrics.RegisterWithDescriptionLabels(
"trace_spans_count",
"counter",
"Number of spans in an operation",
[]string{"service_name", "operation", "app", "instance", "transaction_type", "transaction_category", "transaction_sub_category", "language"},
)
i.Metrics.RegisterWithDescriptionLabels(
"trace_root_operation_latency_ms",
"gauge",
"Trace latency wrt each root trace operation",
[]string{"service_name", "operation", "app", "instance", "transaction_type", "transaction_category", "transaction_sub_category", "language"},
)
i.Metrics.RegisterWithDescriptionLabels(
"trace_root_operations_failed",
"counter",
Expand Down Expand Up @@ -662,9 +635,6 @@ func (i *InMemCollector) send(trace *types.Trace, reason string) {
}
trace.Sent = true

traceDur := time.Since(trace.ArrivalTime)
i.Metrics.Histogram("trace_duration_ms", float64(traceDur.Milliseconds()))
i.Metrics.Histogram("trace_spans_count_total", float64(trace.DescendantCount()))
if trace.RootSpan != nil {
i.Metrics.Increment("trace_send_has_root")
} else {
Expand Down Expand Up @@ -732,7 +702,6 @@ func (i *InMemCollector) send(trace *types.Trace, reason string) {

durationMsString, ok := span.Data["durationMs"]
if ok && durationMsString != nil {

Check failure on line 704 in collect/collect.go

View workflow job for this annotation

GitHub Actions / lint

unnecessary leading newline (whitespace)
i.Metrics.GaugeWithLabels("trace_operations_latency_ms", labels, metrics.ConvertNumeric(durationMsString))

// getting the latency from end and start time
i.Metrics.HistogramWithLabels(
Expand Down Expand Up @@ -766,11 +735,7 @@ func (i *InMemCollector) send(trace *types.Trace, reason string) {
(metrics.ConvertNumeric(span.Data["endTime"])-metrics.ConvertNumeric(span.Data["startTime"]))/float64(time.Millisecond),
)

Check failure on line 737 in collect/collect.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gofumpt`-ed with `-extra` (gofumpt)
i.Metrics.GaugeWithLabels("trace_root_operation_latency_ms", labels, metrics.ConvertNumeric(durationMsString))
i.Metrics.IncrementWithLabels("trace_root_span", labels)
}
i.Metrics.IncrementWithLabels("trace_spans_count", labels)

errorStatus, ok := span.Data["error"]
if ok && errorStatus != nil && errorStatus.(bool) {
i.Metrics.IncrementWithLabels("trace_operations_failed", labels)
Expand Down
10 changes: 3 additions & 7 deletions metrics/opsramp.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ func (p *OpsRampMetrics) calculateTraceOperationError(metricFamilySlice []*io_pr
if !p.re.MatchString(metricFamily.GetName()) {
continue
}
if metricFamily.GetName() == "trace_operations_failed" || metricFamily.GetName() == "trace_spans_count" {
if metricFamily.GetName() == "trace_operations_failed" {
for _, metric := range metricFamily.GetMetric() {
var labels []prompb.Label
for _, label := range metric.GetLabel() {
Expand All @@ -571,19 +571,15 @@ func (p *OpsRampMetrics) calculateTraceOperationError(metricFamilySlice []*io_pr
Value: label.GetValue(),
})
}
key := "trace_operations_failed&trace_spans_count&"
key := "trace_operations_failed&"
labelSlice := metric.GetLabel()
sort.Slice(labelSlice, func(i, j int) bool {
return labelSlice[i].GetName()+labelSlice[i].GetValue() > labelSlice[j].GetName()+labelSlice[i].GetValue()
})
for _, label := range labelSlice {
key += label.GetName() + label.GetValue()
}
if metricFamily.GetName() == "trace_operations_failed" {
uniqueFailedMap[key] = *metric.Counter.Value
} else {
uniqueSpansMap[key] = *metric.Counter.Value
}
uniqueFailedMap[key] = *metric.Counter.Value
uniqueLabelsMap[key] = labels
}
}
Expand Down
14 changes: 0 additions & 14 deletions pkg/libtrace/transmission/transmission.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,10 +290,8 @@ func (h *TraceProxy) Flush() (err error) {
// work can be enqueued.
func (h *TraceProxy) Add(ev *Event) {
if h.tryAdd(ev) {
h.Metrics.Increment("messages_queued")
return
}
h.Metrics.Increment("queue_overflow")
r := Response{
Err: errors.New("queue overflow"),
Metadata: ev.Metadata,
Expand All @@ -309,12 +307,6 @@ func (h *TraceProxy) tryAdd(ev *Event) bool {
h.musterLock.RLock()
defer h.musterLock.RUnlock()

// Even though this queue is locked against changing h.Muster, the Work queue length
// could change due to actions on the worker side, so make sure we only measure it once.
qlen := len(h.muster.Work)
h.Logger.Debug().Logf("adding event to transmission; queue length %d", qlen)
h.Metrics.Gauge("queue_length", qlen)

if h.BlockOnSend {
h.muster.Work <- ev
return true
Expand Down Expand Up @@ -774,9 +766,6 @@ func (b *batchAgg) exportProtoMsgBatch(events []*Event) {
if st, ok := status.FromError(err); ok {
if st.Code() != codes.OK {
b.logger.Error().Logf("sending failed. error: %s", st.String())
b.metrics.Increment("send_errors")
} else {
b.metrics.Increment("batches_sent")
}
}

Expand All @@ -793,15 +782,12 @@ func (b *batchAgg) exportProtoMsgBatch(events []*Event) {
if st, ok := status.FromError(err); ok {
if st.Code() != codes.OK {
b.logger.Error().Logf("sending failed. error: %s", st.String())
b.metrics.Increment("send_errors")
if strings.Contains(strings.ToUpper(err.Error()), "TRACE MANAGEMENT WAS NOT ENABLED") {
b.logger.Error().Logf("Enable Trace Management For Tenant and Restart Tracing Proxy")
m.Lock()
SendTraces = false
m.Unlock()
}
} else {
b.metrics.Increment("batches_sent")
}
}
b.logger.Debug().Logf("trace proxy response msg: %s", r.GetMessage())
Expand Down
1 change: 0 additions & 1 deletion route/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ func (r *Router) LnS(incomingOrPeer string) {
return
}

r.Metrics.Register(r.incomingOrPeer+"_router_proxied", "counter")
r.Metrics.Register(r.incomingOrPeer+"_router_event", "counter")
r.Metrics.Register(r.incomingOrPeer+"_router_batch", "counter")
r.Metrics.Register(r.incomingOrPeer+"_router_nonspan", "counter")
Expand Down

0 comments on commit dcd9704

Please sign in to comment.