From b9e8592bb90bbfdef260d91f90d0fa7f9539394a Mon Sep 17 00:00:00 2001 From: Kewei Yang Date: Thu, 29 Feb 2024 15:06:02 +0800 Subject: [PATCH] Prior to this PR, when pipeline refers to a remote pipeline, in the pipeline-related metrics, the pipeline name tag will be set to 'anonymous'. Taskrun has the same situation. This commit added some scenarios for obtaining pipeline or task names. When the pipeline or task name cannot be determined accurately, the value is obtained through the corresponding label. --- pkg/pipelinerunmetrics/metrics.go | 27 +++++- pkg/pipelinerunmetrics/metrics_test.go | 45 ++++++++++ pkg/taskrunmetrics/metrics.go | 28 ++++-- pkg/taskrunmetrics/metrics_test.go | 120 ++++++++++++++++++++++++- 4 files changed, 207 insertions(+), 13 deletions(-) diff --git a/pkg/pipelinerunmetrics/metrics.go b/pkg/pipelinerunmetrics/metrics.go index 4794bc8955e..85e5436fd78 100644 --- a/pkg/pipelinerunmetrics/metrics.go +++ b/pkg/pipelinerunmetrics/metrics.go @@ -24,6 +24,7 @@ import ( "time" "github.com/tektoncd/pipeline/pkg/apis/config" + "github.com/tektoncd/pipeline/pkg/apis/pipeline" v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" listers "github.com/tektoncd/pipeline/pkg/client/listers/pipeline/v1" "go.opencensus.io/stats" @@ -76,6 +77,8 @@ const ( // ReasonCancelled indicates that a PipelineRun was cancelled. // Aliased for backwards compatibility; additional reasons should not be added here. ReasonCancelled = v1.PipelineRunReasonCancelled + + anonymous = "anonymous" ) // Recorder holds keys for Tekton metrics @@ -233,6 +236,24 @@ func nilInsertTag(task, taskrun string) []tag.Mutator { return []tag.Mutator{} } +func getPipelineTagName(pr *v1.PipelineRun) string { + pipelineName := anonymous + switch { + case pr.Spec.PipelineRef != nil && pr.Spec.PipelineRef.Name != "": + pipelineName = pr.Spec.PipelineRef.Name + case pr.Spec.PipelineSpec != nil: + default: + if len(pr.Labels) > 0 { + pipelineLabel, hasPipelineLabel := pr.Labels[pipeline.PipelineLabelKey] + if hasPipelineLabel && len(pipelineLabel) > 0 { + pipelineName = pipelineLabel + } + } + } + + return pipelineName +} + // DurationAndCount logs the duration of PipelineRun execution and // count for number of PipelineRuns succeed or failed // returns an error if its failed to log the metrics @@ -268,10 +289,8 @@ func (r *Recorder) DurationAndCount(pr *v1.PipelineRun, beforeCondition *apis.Co } reason := cond.Reason - pipelineName := "anonymous" - if pr.Spec.PipelineRef != nil && pr.Spec.PipelineRef.Name != "" { - pipelineName = pr.Spec.PipelineRef.Name - } + pipelineName := getPipelineTagName(pr) + ctx, err := tag.New( context.Background(), append([]tag.Mutator{tag.Insert(namespaceTag, pr.Namespace), diff --git a/pkg/pipelinerunmetrics/metrics_test.go b/pkg/pipelinerunmetrics/metrics_test.go index 6e96fe84006..38cafa4bd2e 100644 --- a/pkg/pipelinerunmetrics/metrics_test.go +++ b/pkg/pipelinerunmetrics/metrics_test.go @@ -23,6 +23,8 @@ import ( "testing" "time" + "github.com/tektoncd/pipeline/pkg/apis/pipeline" + "github.com/tektoncd/pipeline/pkg/apis/config" v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" fakepipelineruninformer "github.com/tektoncd/pipeline/pkg/client/injection/informers/pipeline/v1/pipelinerun/fake" @@ -384,6 +386,49 @@ func TestRecordPipelineRunDurationCount(t *testing.T) { expectedCount: 1, beforeCondition: nil, countWithReason: true, + }, { + name: "for failed pipeline with reference remote pipeline", + pipelineRun: &v1.PipelineRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pipelinerun-1", + Namespace: "ns", + Labels: map[string]string{ + pipeline.PipelineLabelKey: "pipeline-remote", + }, + }, + Spec: v1.PipelineRunSpec{ + PipelineRef: &v1.PipelineRef{ + ResolverRef: v1.ResolverRef{ + Resolver: "git", + }, + }, + }, + Status: v1.PipelineRunStatus{ + Status: duckv1.Status{ + Conditions: duckv1.Conditions{{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionFalse, + }}, + }, + PipelineRunStatusFields: v1.PipelineRunStatusFields{ + StartTime: &startTime, + CompletionTime: &completionTime, + }, + }, + }, + expectedDurationTags: map[string]string{ + "pipeline": "pipeline-remote", + "pipelinerun": "pipelinerun-1", + "namespace": "ns", + "status": "failed", + }, + expectedCountTags: map[string]string{ + "status": "failed", + }, + expectedDuration: 60, + expectedCount: 1, + beforeCondition: nil, + countWithReason: false, }} { t.Run(test.name, func(t *testing.T) { unregisterMetrics() diff --git a/pkg/taskrunmetrics/metrics.go b/pkg/taskrunmetrics/metrics.go index e5b251b497d..ed8b35907a4 100644 --- a/pkg/taskrunmetrics/metrics.go +++ b/pkg/taskrunmetrics/metrics.go @@ -308,6 +308,24 @@ func nilInsertTag(task, taskrun string) []tag.Mutator { return []tag.Mutator{} } +func getTaskTagName(tr *v1.TaskRun) string { + taskName := anonymous + switch { + case tr.Spec.TaskRef != nil && len(tr.Spec.TaskRef.Name) > 0: + taskName = tr.Spec.TaskRef.Name + case tr.Spec.TaskSpec != nil: + default: + if len(tr.Labels) > 0 { + taskLabel, hasTaskLabel := tr.Labels[pipeline.TaskLabelKey] + if hasTaskLabel && len(taskLabel) > 0 { + taskName = taskLabel + } + } + } + + return taskName +} + // DurationAndCount logs the duration of TaskRun execution and // count for number of TaskRuns succeed or failed // returns an error if its failed to log the metrics @@ -329,10 +347,7 @@ func (r *Recorder) DurationAndCount(ctx context.Context, tr *v1.TaskRun, beforeC duration = tr.Status.CompletionTime.Sub(tr.Status.StartTime.Time) } - taskName := anonymous - if tr.Spec.TaskRef != nil { - taskName = tr.Spec.TaskRef.Name - } + taskName := getTaskTagName(tr) cond := tr.Status.GetCondition(apis.ConditionSucceeded) status := "success" @@ -449,10 +464,7 @@ func (r *Recorder) RecordPodLatency(ctx context.Context, pod *corev1.Pod, tr *v1 } latency := scheduledTime.Sub(pod.CreationTimestamp.Time) - taskName := anonymous - if tr.Spec.TaskRef != nil { - taskName = tr.Spec.TaskRef.Name - } + taskName := getTaskTagName(tr) ctx, err := tag.New( ctx, diff --git a/pkg/taskrunmetrics/metrics_test.go b/pkg/taskrunmetrics/metrics_test.go index 437fd032846..03dcb0e5af2 100644 --- a/pkg/taskrunmetrics/metrics_test.go +++ b/pkg/taskrunmetrics/metrics_test.go @@ -272,6 +272,50 @@ func TestRecordTaskRunDurationCount(t *testing.T) { expectedCount: 1, beforeCondition: nil, countWithReason: false, + }, { + name: "for failed taskrun with reference remote task", + taskRun: &v1.TaskRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "taskrun-1", + Namespace: "ns", + Labels: map[string]string{ + pipeline.TaskLabelKey: "task-remote", + }, + }, + Spec: v1.TaskRunSpec{ + TaskRef: &v1.TaskRef{ + ResolverRef: v1.ResolverRef{ + Resolver: "git", + }, + }, + }, + Status: v1.TaskRunStatus{ + Status: duckv1.Status{ + Conditions: duckv1.Conditions{{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionFalse, + }}, + }, + TaskRunStatusFields: v1.TaskRunStatusFields{ + StartTime: &startTime, + CompletionTime: &completionTime, + }, + }, + }, + metricName: "taskrun_duration_seconds", + expectedDurationTags: map[string]string{ + "task": "task-remote", + "taskrun": "taskrun-1", + "namespace": "ns", + "status": "failed", + }, + expectedCountTags: map[string]string{ + "status": "failed", + }, + expectedDuration: 60, + expectedCount: 1, + beforeCondition: nil, + countWithReason: false, }, { name: "for succeeded taskrun in pipelinerun", taskRun: &v1.TaskRun{ @@ -574,12 +618,38 @@ func TestRecordPodLatency(t *testing.T) { TaskRef: &v1.TaskRef{Name: "task-1"}, }, } + trFromRemoteTask := &v1.TaskRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-taskrun", + Namespace: "foo", + Labels: map[string]string{ + pipeline.TaskLabelKey: "task-remote", + }, + }, + Spec: v1.TaskRunSpec{ + TaskRef: &v1.TaskRef{ + ResolverRef: v1.ResolverRef{Resolver: "task-remote"}, + }, + }, + } + emptyLabelTRFromRemoteTask := &v1.TaskRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-taskrun", + Namespace: "foo", + }, + Spec: v1.TaskRunSpec{ + TaskRef: &v1.TaskRef{ + ResolverRef: v1.ResolverRef{Resolver: "task-remote"}, + }, + }, + } for _, td := range []struct { name string pod *corev1.Pod expectedTags map[string]string expectedValue float64 expectingError bool + taskRun *v1.TaskRun }{{ name: "for scheduled pod", pod: &corev1.Pod{ @@ -602,6 +672,53 @@ func TestRecordPodLatency(t *testing.T) { "namespace": "foo", }, expectedValue: 4000, + taskRun: taskRun, + }, { + name: "for scheduled pod with reference remote task", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-taskrun-pod-123456", + Namespace: "foo", + CreationTimestamp: creationTime, + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{{ + Type: corev1.PodScheduled, + LastTransitionTime: metav1.Time{Time: creationTime.Add(4 * time.Second)}, + }}, + }, + }, + expectedTags: map[string]string{ + "pod": "test-taskrun-pod-123456", + "task": "task-remote", + "taskrun": "test-taskrun", + "namespace": "foo", + }, + expectedValue: 4000, + taskRun: trFromRemoteTask, + }, { + name: "for scheduled pod - empty label tr reference remote task", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-taskrun-pod-123456", + Namespace: "foo", + CreationTimestamp: creationTime, + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{{ + Type: corev1.PodScheduled, + LastTransitionTime: metav1.Time{Time: creationTime.Add(4 * time.Second)}, + }}, + }, + }, + expectedTags: map[string]string{ + "pod": "test-taskrun-pod-123456", + "task": anonymous, + "taskrun": "test-taskrun", + "namespace": "foo", + }, + expectedValue: 4000, + taskRun: emptyLabelTRFromRemoteTask, }, { name: "for non scheduled pod", pod: &corev1.Pod{ @@ -613,6 +730,7 @@ func TestRecordPodLatency(t *testing.T) { Status: corev1.PodStatus{}, }, expectingError: true, + taskRun: taskRun, }} { t.Run(td.name, func(t *testing.T) { unregisterMetrics() @@ -623,7 +741,7 @@ func TestRecordPodLatency(t *testing.T) { t.Fatalf("NewRecorder: %v", err) } - if err := metrics.RecordPodLatency(ctx, td.pod, taskRun); td.expectingError && err == nil { + if err := metrics.RecordPodLatency(ctx, td.pod, td.taskRun); td.expectingError && err == nil { t.Error("RecordPodLatency wanted error, got nil") } else if !td.expectingError { if err != nil {