Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(job): add duration histogram #957

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pkg/controller/collectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ var (
testSuiteLabels = []string{"test_suite_name"}
testCaseLabels = []string{"test_case_name", "test_case_classname"}
statusesList = [...]string{"created", "waiting_for_resource", "preparing", "pending", "running", "success", "failed", "canceled", "skipped", "manual", "scheduled", "error"}
latencyHistogramBuckets = []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 20, 30, 40, 50, 60, 90, 150, 210, 270, 330, 390, 450, 500, 600, 1200, 1800, 2700, 3600}
)

// NewInternalCollectorCurrentlyQueuedTasksCount returns a new collector for the gcpe_currently_queued_tasks_count metric.
Expand Down Expand Up @@ -266,6 +267,18 @@ func NewCollectorJobDurationSeconds() prometheus.Collector {
)
}

// NewCollectorJobDurationHistogram returns a new collector for the gitlab_ci_pipeline_job_duration_seconds histogram metrics.
func NewCollectorJobDurationHistogram() prometheus.Collector {
return prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "gitlab_ci_pipeline_job_histogram_duration_seconds",
Help: "Histogram of duration (seconds) of finished gitlab jobs",
Buckets: latencyHistogramBuckets,
},
append(defaultLabels, jobLabels...),
)
}

// NewCollectorJobID returns a new collector for the gitlab_ci_pipeline_job_id metric.
func NewCollectorJobID() prometheus.Collector {
return prometheus.NewGaugeVec(
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/garbage_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ func (c *Controller) GarbageCollectMetrics(ctx context.Context) error {
switch m.Kind {
case schemas.MetricKindJobArtifactSizeBytes,
schemas.MetricKindJobDurationSeconds,
schemas.MetricKindJobDurationHistogram,
schemas.MetricKindJobID,
schemas.MetricKindJobRunCount,
schemas.MetricKindJobStatus,
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/garbage_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ func TestGarbageCollectMetrics(t *testing.T) {
ref1m1 := schemas.Metric{Kind: schemas.MetricKindCoverage, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}}
ref1m2 := schemas.Metric{Kind: schemas.MetricKindStatus, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}}
ref1m3 := schemas.Metric{Kind: schemas.MetricKindJobDurationSeconds, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}}
ref1m4 := schemas.Metric{Kind: schemas.MetricKindJobDurationHistogram, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}}

ref2m1 := schemas.Metric{Kind: schemas.MetricKindCoverage, Labels: prometheus.Labels{"project": "p2", "ref": "bar", "kind": "branch"}}
ref3m1 := schemas.Metric{Kind: schemas.MetricKindCoverage, Labels: prometheus.Labels{"project": "foo", "kind": "branch"}}
Expand All @@ -150,6 +151,7 @@ func TestGarbageCollectMetrics(t *testing.T) {
c.Store.SetMetric(ctx, ref1m1)
c.Store.SetMetric(ctx, ref1m2)
c.Store.SetMetric(ctx, ref1m3)
c.Store.SetMetric(ctx, ref1m4)
c.Store.SetMetric(ctx, ref2m1)
c.Store.SetMetric(ctx, ref3m1)
c.Store.SetMetric(ctx, ref4m1)
Expand All @@ -161,6 +163,7 @@ func TestGarbageCollectMetrics(t *testing.T) {
expectedMetrics := schemas.Metrics{
ref1m1.Key(): ref1m1,
ref1m3.Key(): ref1m3,
ref1m4.Key(): ref1m4,
}
assert.Equal(t, expectedMetrics, storedMetrics)
}
6 changes: 6 additions & 0 deletions pkg/controller/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ func (c *Controller) ProcessJobMetrics(ctx context.Context, ref schemas.Ref, job
Value: job.DurationSeconds,
})

storeSetMetric(ctx, c.Store, schemas.Metric{
Kind: schemas.MetricKindJobDurationHistogram,
Labels: labels,
Value: job.DurationSeconds,
})

storeSetMetric(ctx, c.Store, schemas.Metric{
Kind: schemas.MetricKindJobQueuedDurationSeconds,
Labels: labels,
Expand Down
111 changes: 111 additions & 0 deletions pkg/controller/jobs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ func TestProcessJobMetrics(t *testing.T) {
}
assert.Equal(t, lastRunJobDuration, metrics[lastRunJobDuration.Key()])

jobDurationHistogram := schemas.Metric{
Kind: schemas.MetricKindJobDurationHistogram,
Labels: labels,
Value: newJob.DurationSeconds,
}

assert.Equal(t, jobDurationHistogram, metrics[jobDurationHistogram.Key()])

jobRunCount := schemas.Metric{
Kind: schemas.MetricKindJobRunCount,
Labels: labels,
Expand All @@ -171,3 +179,106 @@ func TestProcessJobMetrics(t *testing.T) {
}
assert.Equal(t, status, metrics[status.Key()])
}

func TestProcessJobHistogramMetrics(t *testing.T) {
ctx, c, _, srv := newTestController(config.Config{})
srv.Close()

oldJob := schemas.Job{
ID: 1,
Name: "foo",
Timestamp: 1,
}

job1 := schemas.Job{
ID: 2,
Name: "foo",
Timestamp: 2,
DurationSeconds: 15,
Status: "failed",
Stage: "🚀",
TagList: "",
ArtifactSize: 150,
Runner: schemas.Runner{
Description: "foo-123-bar",
},
}

job2 := schemas.Job{
ID: 3,
Name: "foo",
Timestamp: 2,
DurationSeconds: 20,
Status: "failed",
Stage: "🚀",
TagList: "",
ArtifactSize: 150,
Runner: schemas.Runner{
Description: "foo-123-bar",
},
}
p := schemas.NewProject("foo")
p.Topics = "first,second"
p.Pull.Pipeline.Jobs.RunnerDescription.AggregationRegexp = `foo-(.*)-bar`

ref := schemas.NewRef(p, schemas.RefKindBranch, "foo")
ref.LatestPipeline.ID = 1
ref.LatestPipeline.Variables = "none"
ref.LatestJobs = schemas.Jobs{
"foo": oldJob,
}

c.Store.SetRef(ctx, ref)

// If we run it against the same job, nothing should change in the store
c.ProcessJobMetrics(ctx, ref, oldJob)
refs, _ := c.Store.Refs(ctx)
assert.Equal(t, schemas.Jobs{
"foo": oldJob,
}, refs[ref.Key()].LatestJobs)

// Update the ref job 1
c.ProcessJobMetrics(ctx, ref, job1)
refs, _ = c.Store.Refs(ctx)
assert.Equal(t, schemas.Jobs{
"foo": job1,
}, refs[ref.Key()].LatestJobs)

// Check if all the metrics exist
metrics, _ := c.Store.Metrics(ctx)
labels := map[string]string{
"project": ref.Project.Name,
"topics": ref.Project.Topics,
"ref": ref.Name,
"kind": string(ref.Kind),
"variables": ref.LatestPipeline.Variables,
"source": ref.LatestPipeline.Source,
"stage": job1.Stage,
"tag_list": job1.TagList,
"failure_reason": job1.FailureReason,
"job_name": job1.Name,
"runner_description": ref.Project.Pull.Pipeline.Jobs.RunnerDescription.AggregationRegexp,
}
jobDurationHistogram := schemas.Metric{
Kind: schemas.MetricKindJobDurationHistogram,
Labels: labels,
Value: job1.DurationSeconds,
}

assert.Equal(t, jobDurationHistogram, metrics[jobDurationHistogram.Key()])

// Update the ref job 2
c.ProcessJobMetrics(ctx, ref, job2)
refs, _ = c.Store.Refs(ctx)
assert.Equal(t, schemas.Jobs{
"foo": job2,
}, refs[ref.Key()].LatestJobs)
jobDurationHistogram = schemas.Metric{
Kind: schemas.MetricKindJobDurationHistogram,
Labels: labels,
Value: job2.DurationSeconds,
}

assert.Equal(t, jobDurationHistogram, metrics[jobDurationHistogram.Key()])

}
3 changes: 3 additions & 0 deletions pkg/controller/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func NewRegistry(ctx context.Context) *Registry {
schemas.MetricKindID: NewCollectorID(),
schemas.MetricKindJobArtifactSizeBytes: NewCollectorJobArtifactSizeBytes(),
schemas.MetricKindJobDurationSeconds: NewCollectorJobDurationSeconds(),
schemas.MetricKindJobDurationHistogram: NewCollectorJobDurationHistogram(),
schemas.MetricKindJobID: NewCollectorJobID(),
schemas.MetricKindJobQueuedDurationSeconds: NewCollectorJobQueuedDurationSeconds(),
schemas.MetricKindJobRunCount: NewCollectorJobRunCount(),
Expand Down Expand Up @@ -194,6 +195,8 @@ func (r *Registry) ExportMetrics(metrics schemas.Metrics) {
c.With(m.Labels).Set(m.Value)
case *prometheus.CounterVec:
c.With(m.Labels).Add(m.Value)
case *prometheus.HistogramVec:
c.With(m.Labels).Observe(m.Value)
default:
log.Errorf("unsupported collector type : %v", reflect.TypeOf(c))
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/schemas/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ const (
// MetricKindJobDurationSeconds ..
MetricKindJobDurationSeconds

// MetricKindJobDurationHistogram ..
MetricKindJobDurationHistogram

// MetricKindJobID ..
MetricKindJobID

Expand Down Expand Up @@ -147,7 +150,7 @@ func (m Metric) Key() MetricKey {
m.Labels["source"],
})

case MetricKindJobArtifactSizeBytes, MetricKindJobDurationSeconds, MetricKindJobID, MetricKindJobQueuedDurationSeconds, MetricKindJobRunCount, MetricKindJobStatus, MetricKindJobTimestamp:
case MetricKindJobArtifactSizeBytes, MetricKindJobDurationSeconds, MetricKindJobDurationHistogram, MetricKindJobID, MetricKindJobQueuedDurationSeconds, MetricKindJobRunCount, MetricKindJobStatus, MetricKindJobTimestamp:
key += fmt.Sprintf("%v", []string{
m.Labels["project"],
m.Labels["kind"],
Expand Down