Skip to content
This repository has been archived by the owner on Mar 15, 2024. It is now read-only.

Add additional error metrics #32

Merged
merged 1 commit into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions metrics/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,17 @@ func (m *Metrics) HandleFailureEvent(ctx context.Context, id types.RetrievalID,
m.graphsyncRetrievalFailureCount.Add(ctx, 1, attribute.String("sp_id", storageProviderID))
}
var errorMetricMatches = map[string]instrument.Int64Counter{
"response rejected": m.retrievalErrorRejectedCount,
"Too many retrieval deals received": m.retrievalErrorTooManyCount,
"Access Control": m.retrievalErrorACLCount,
"Under maintenance, retry later": m.retrievalErrorMaintenanceCount,
"miner is not accepting online retrieval deals": m.retrievalErrorNoOnlineCount,
"unconfirmed block transfer": m.retrievalErrorUnconfirmedCount,
"timeout after ": m.retrievalErrorTimeoutCount,
"response rejected": m.retrievalErrorRejectedCount,
"Too many retrieval deals received": m.retrievalErrorTooManyCount,
"Access Control": m.retrievalErrorACLCount,
"Under maintenance, retry later": m.retrievalErrorMaintenanceCount,
"miner is not accepting online retrieval deals": m.retrievalErrorNoOnlineCount,
"unconfirmed block transfer": m.retrievalErrorUnconfirmedCount,
"timeout after ": m.retrievalErrorTimeoutCount,
"there is no unsealed piece containing payload cid": m.retrievalErrorNoUnsealedCount,
"getting pieces for cid": m.retrievalErrorDAGStoreCount,
"graphsync request failed to complete: request failed - unknown reason": m.retrievalErrorGraphsyncCount,
"failed to dial": m.retrievalErrorFailedToDialCount,
}

var matched bool
Expand Down
41 changes: 33 additions & 8 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,27 @@ func (m *Metrics) Start() error {
); err != nil {
return err
}
if m.retrievalErrorNoUnsealedCount, err = meter.Int64Counter(meterName+"/retrieval_error_no_unsealed_total",
instrument.WithDescription("The number of retrieval errors where the provider could not find an unsealed piece"),
); err != nil {
return err
}
if m.retrievalErrorDAGStoreCount, err = meter.Int64Counter(meterName+"/retrieval_error_dagstore_total",
instrument.WithDescription("The number of retrieval errors due to DAG Store issues"),
); err != nil {
return err
}
if m.retrievalErrorGraphsyncCount, err = meter.Int64Counter(meterName+"/retrieval_error_graphsync_total",
instrument.WithDescription("The number of retrieval errors due to graphsync requests that errored"),
); err != nil {
return err
}
if m.retrievalErrorFailedToDialCount, err = meter.Int64Counter(meterName+"/retrieval_error_failed_to_dial_total",
instrument.WithDescription("The number of retrieval errors because we could not connected to the provider"),
); err != nil {
return err
}

if m.retrievalErrorOtherCount, err = meter.Int64Counter(meterName+"/retrieval_error_other_total",
instrument.WithDescription("The number of retrieval errors with uncategorized causes"),
); err != nil {
Expand Down Expand Up @@ -316,14 +337,18 @@ type stats struct {
retrievalDealSize instrument.Int64Histogram

// error kinds
retrievalErrorRejectedCount instrument.Int64Counter
retrievalErrorTooManyCount instrument.Int64Counter
retrievalErrorACLCount instrument.Int64Counter
retrievalErrorMaintenanceCount instrument.Int64Counter
retrievalErrorNoOnlineCount instrument.Int64Counter
retrievalErrorUnconfirmedCount instrument.Int64Counter
retrievalErrorTimeoutCount instrument.Int64Counter
retrievalErrorOtherCount instrument.Int64Counter
retrievalErrorRejectedCount instrument.Int64Counter
retrievalErrorTooManyCount instrument.Int64Counter
retrievalErrorACLCount instrument.Int64Counter
retrievalErrorMaintenanceCount instrument.Int64Counter
retrievalErrorNoOnlineCount instrument.Int64Counter
retrievalErrorUnconfirmedCount instrument.Int64Counter
retrievalErrorTimeoutCount instrument.Int64Counter
retrievalErrorOtherCount instrument.Int64Counter
retrievalErrorNoUnsealedCount instrument.Int64Counter
retrievalErrorDAGStoreCount instrument.Int64Counter
retrievalErrorGraphsyncCount instrument.Int64Counter
retrievalErrorFailedToDialCount instrument.Int64Counter

// averages
indexerCandidatesPerRequestCount instrument.Int64Histogram
Expand Down