From 67ff546d89e1ea31beb1db81af7829f9d5dd1bc9 Mon Sep 17 00:00:00 2001 From: weizhichen Date: Thu, 30 Mar 2023 05:00:44 +0000 Subject: [PATCH] fix: dump namespace info --- go.mod | 1 + go.sum | 2 + test/e2e/dynamic_provisioning_test.go | 1 + test/e2e/pre_provisioning_test.go | 1 + .../client_golang/prometheus/testutil/lint.go | 46 ++ .../prometheus/testutil/promlint/promlint.go | 387 ++++++++++ .../prometheus/testutil/testutil.go | 342 +++++++++ .../metrics/testutil/metrics.go | 435 ++++++++++++ .../metrics/testutil/promlint.go | 151 ++++ .../metrics/testutil/testutil.go | 86 +++ vendor/k8s.io/kubelet/LICENSE | 202 ++++++ .../kubelet/pkg/apis/stats/v1alpha1/types.go | 358 ++++++++++ .../test/e2e/framework/debug/dump.go | 187 +++++ .../test/e2e/framework/debug/init/init.go | 101 +++ .../framework/debug/log_size_monitoring.go | 288 ++++++++ .../debug/resource_usage_gatherer.go | 659 ++++++++++++++++++ .../test/e2e/framework/metrics/api.go | 89 +++ .../framework/metrics/api_server_metrics.go | 52 ++ .../metrics/cluster_autoscaler_metrics.go | 40 ++ .../metrics/controller_manager_metrics.go | 40 ++ .../test/e2e/framework/metrics/e2e_metrics.go | 127 ++++ .../test/e2e/framework/metrics/grab.go | 73 ++ .../framework/metrics/interesting_metrics.go | 58 ++ .../e2e/framework/metrics/kubelet_metrics.go | 236 +++++++ .../test/e2e/framework/metrics/latencies.go | 38 + .../e2e/framework/metrics/metrics_grabber.go | 452 ++++++++++++ .../test/e2e/framework/metrics/pod.go | 29 + .../framework/metrics/scheduler_metrics.go | 40 ++ .../metrics/snapshot_controller_metrics.go | 40 ++ .../kubernetes/test/e2e/perftype/perftype.go | 53 ++ vendor/modules.txt | 10 + 31 files changed, 4624 insertions(+) create mode 100644 vendor/github.com/prometheus/client_golang/prometheus/testutil/lint.go create mode 100644 vendor/github.com/prometheus/client_golang/prometheus/testutil/promlint/promlint.go create mode 100644 vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go create mode 100644 vendor/k8s.io/component-base/metrics/testutil/metrics.go create mode 100644 vendor/k8s.io/component-base/metrics/testutil/promlint.go create mode 100644 vendor/k8s.io/component-base/metrics/testutil/testutil.go create mode 100644 vendor/k8s.io/kubelet/LICENSE create mode 100644 vendor/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/debug/dump.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/debug/init/init.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/debug/log_size_monitoring.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/debug/resource_usage_gatherer.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api_server_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/cluster_autoscaler_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/controller_manager_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/e2e_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/grab.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/interesting_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/kubelet_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/latencies.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/metrics_grabber.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/pod.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/scheduler_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/framework/metrics/snapshot_controller_metrics.go create mode 100644 vendor/k8s.io/kubernetes/test/e2e/perftype/perftype.go diff --git a/go.mod b/go.mod index b468d773c8..c4945b4e04 100644 --- a/go.mod +++ b/go.mod @@ -128,6 +128,7 @@ require ( k8s.io/component-helpers v0.26.0 // indirect k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect k8s.io/kubectl v0.0.0 // indirect + k8s.io/kubelet v0.26.0 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.33 // indirect sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect diff --git a/go.sum b/go.sum index 06c7a1e1b9..094a2c5ec2 100644 --- a/go.sum +++ b/go.sum @@ -890,6 +890,8 @@ k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 h1:+70TFaan3hfJzs+7VK2o+O k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280/go.mod h1:+Axhij7bCpeqhklhUTe3xmOn6bWxolyZEeyaFpjGtl4= k8s.io/kubectl v0.26.0 h1:xmrzoKR9CyNdzxBmXV7jW9Ln8WMrwRK6hGbbf69o4T0= k8s.io/kubectl v0.26.0/go.mod h1:eInP0b+U9XUJWSYeU9XZnTA+cVYuWyl3iYPGtru0qhQ= +k8s.io/kubelet v0.26.0 h1:08bDb5IoUH/1K1t2NUwnGIIWxjm9LSqn6k3FWw1tJGI= +k8s.io/kubelet v0.26.0/go.mod h1:DluF+d8jS2nE/Hs7CC3QM+OZlIEb22NTOihQ3EDwCQ4= k8s.io/kubernetes v1.26.0 h1:fL8VMr4xlfTazPORLhz5fsvO5I3bsFpmynVxZTH1ItQ= k8s.io/kubernetes v1.26.0/go.mod h1:z0aCJwn6DxzB/dDiWLbQaJO5jWOR2qoaCMnmSAx45XM= k8s.io/mount-utils v0.26.0 h1:MG5oXE2aF1UHMJ3KFbVtBtiRA4J/2u0sijrkfsoaMwU= diff --git a/test/e2e/dynamic_provisioning_test.go b/test/e2e/dynamic_provisioning_test.go index e59c343f88..458bd4d1aa 100644 --- a/test/e2e/dynamic_provisioning_test.go +++ b/test/e2e/dynamic_provisioning_test.go @@ -33,6 +33,7 @@ import ( v1 "k8s.io/api/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" + _ "k8s.io/kubernetes/test/e2e/framework/debug/init" admissionapi "k8s.io/pod-security-admission/api" ) diff --git a/test/e2e/pre_provisioning_test.go b/test/e2e/pre_provisioning_test.go index 058a6990e4..e31ec5e61c 100644 --- a/test/e2e/pre_provisioning_test.go +++ b/test/e2e/pre_provisioning_test.go @@ -30,6 +30,7 @@ import ( storagev1 "k8s.io/api/storage/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" + _ "k8s.io/kubernetes/test/e2e/framework/debug/init" admissionapi "k8s.io/pod-security-admission/api" ) diff --git a/vendor/github.com/prometheus/client_golang/prometheus/testutil/lint.go b/vendor/github.com/prometheus/client_golang/prometheus/testutil/lint.go new file mode 100644 index 0000000000..8d2f05500b --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/testutil/lint.go @@ -0,0 +1,46 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil/promlint" +) + +// CollectAndLint registers the provided Collector with a newly created pedantic +// Registry. It then calls GatherAndLint with that Registry and with the +// provided metricNames. +func CollectAndLint(c prometheus.Collector, metricNames ...string) ([]promlint.Problem, error) { + reg := prometheus.NewPedanticRegistry() + if err := reg.Register(c); err != nil { + return nil, fmt.Errorf("registering collector failed: %w", err) + } + return GatherAndLint(reg, metricNames...) +} + +// GatherAndLint gathers all metrics from the provided Gatherer and checks them +// with the linter in the promlint package. If any metricNames are provided, +// only metrics with those names are checked. +func GatherAndLint(g prometheus.Gatherer, metricNames ...string) ([]promlint.Problem, error) { + got, err := g.Gather() + if err != nil { + return nil, fmt.Errorf("gathering metrics failed: %w", err) + } + if metricNames != nil { + got = filterMetrics(got, metricNames) + } + return promlint.NewWithMetricFamilies(got).Lint() +} diff --git a/vendor/github.com/prometheus/client_golang/prometheus/testutil/promlint/promlint.go b/vendor/github.com/prometheus/client_golang/prometheus/testutil/promlint/promlint.go new file mode 100644 index 0000000000..a20f159b78 --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/testutil/promlint/promlint.go @@ -0,0 +1,387 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package promlint provides a linter for Prometheus metrics. +package promlint + +import ( + "errors" + "fmt" + "io" + "regexp" + "sort" + "strings" + + "github.com/prometheus/common/expfmt" + + dto "github.com/prometheus/client_model/go" +) + +// A Linter is a Prometheus metrics linter. It identifies issues with metric +// names, types, and metadata, and reports them to the caller. +type Linter struct { + // The linter will read metrics in the Prometheus text format from r and + // then lint it, _and_ it will lint the metrics provided directly as + // MetricFamily proto messages in mfs. Note, however, that the current + // constructor functions New and NewWithMetricFamilies only ever set one + // of them. + r io.Reader + mfs []*dto.MetricFamily +} + +// A Problem is an issue detected by a Linter. +type Problem struct { + // The name of the metric indicated by this Problem. + Metric string + + // A description of the issue for this Problem. + Text string +} + +// newProblem is helper function to create a Problem. +func newProblem(mf *dto.MetricFamily, text string) Problem { + return Problem{ + Metric: mf.GetName(), + Text: text, + } +} + +// New creates a new Linter that reads an input stream of Prometheus metrics in +// the Prometheus text exposition format. +func New(r io.Reader) *Linter { + return &Linter{ + r: r, + } +} + +// NewWithMetricFamilies creates a new Linter that reads from a slice of +// MetricFamily protobuf messages. +func NewWithMetricFamilies(mfs []*dto.MetricFamily) *Linter { + return &Linter{ + mfs: mfs, + } +} + +// Lint performs a linting pass, returning a slice of Problems indicating any +// issues found in the metrics stream. The slice is sorted by metric name +// and issue description. +func (l *Linter) Lint() ([]Problem, error) { + var problems []Problem + + if l.r != nil { + d := expfmt.NewDecoder(l.r, expfmt.FmtText) + + mf := &dto.MetricFamily{} + for { + if err := d.Decode(mf); err != nil { + if errors.Is(err, io.EOF) { + break + } + + return nil, err + } + + problems = append(problems, lint(mf)...) + } + } + for _, mf := range l.mfs { + problems = append(problems, lint(mf)...) + } + + // Ensure deterministic output. + sort.SliceStable(problems, func(i, j int) bool { + if problems[i].Metric == problems[j].Metric { + return problems[i].Text < problems[j].Text + } + return problems[i].Metric < problems[j].Metric + }) + + return problems, nil +} + +// lint is the entry point for linting a single metric. +func lint(mf *dto.MetricFamily) []Problem { + fns := []func(mf *dto.MetricFamily) []Problem{ + lintHelp, + lintMetricUnits, + lintCounter, + lintHistogramSummaryReserved, + lintMetricTypeInName, + lintReservedChars, + lintCamelCase, + lintUnitAbbreviations, + } + + var problems []Problem + for _, fn := range fns { + problems = append(problems, fn(mf)...) + } + + // TODO(mdlayher): lint rules for specific metrics types. + return problems +} + +// lintHelp detects issues related to the help text for a metric. +func lintHelp(mf *dto.MetricFamily) []Problem { + var problems []Problem + + // Expect all metrics to have help text available. + if mf.Help == nil { + problems = append(problems, newProblem(mf, "no help text")) + } + + return problems +} + +// lintMetricUnits detects issues with metric unit names. +func lintMetricUnits(mf *dto.MetricFamily) []Problem { + var problems []Problem + + unit, base, ok := metricUnits(*mf.Name) + if !ok { + // No known units detected. + return nil + } + + // Unit is already a base unit. + if unit == base { + return nil + } + + problems = append(problems, newProblem(mf, fmt.Sprintf("use base unit %q instead of %q", base, unit))) + + return problems +} + +// lintCounter detects issues specific to counters, as well as patterns that should +// only be used with counters. +func lintCounter(mf *dto.MetricFamily) []Problem { + var problems []Problem + + isCounter := mf.GetType() == dto.MetricType_COUNTER + isUntyped := mf.GetType() == dto.MetricType_UNTYPED + hasTotalSuffix := strings.HasSuffix(mf.GetName(), "_total") + + switch { + case isCounter && !hasTotalSuffix: + problems = append(problems, newProblem(mf, `counter metrics should have "_total" suffix`)) + case !isUntyped && !isCounter && hasTotalSuffix: + problems = append(problems, newProblem(mf, `non-counter metrics should not have "_total" suffix`)) + } + + return problems +} + +// lintHistogramSummaryReserved detects when other types of metrics use names or labels +// reserved for use by histograms and/or summaries. +func lintHistogramSummaryReserved(mf *dto.MetricFamily) []Problem { + // These rules do not apply to untyped metrics. + t := mf.GetType() + if t == dto.MetricType_UNTYPED { + return nil + } + + var problems []Problem + + isHistogram := t == dto.MetricType_HISTOGRAM + isSummary := t == dto.MetricType_SUMMARY + + n := mf.GetName() + + if !isHistogram && strings.HasSuffix(n, "_bucket") { + problems = append(problems, newProblem(mf, `non-histogram metrics should not have "_bucket" suffix`)) + } + if !isHistogram && !isSummary && strings.HasSuffix(n, "_count") { + problems = append(problems, newProblem(mf, `non-histogram and non-summary metrics should not have "_count" suffix`)) + } + if !isHistogram && !isSummary && strings.HasSuffix(n, "_sum") { + problems = append(problems, newProblem(mf, `non-histogram and non-summary metrics should not have "_sum" suffix`)) + } + + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + ln := l.GetName() + + if !isHistogram && ln == "le" { + problems = append(problems, newProblem(mf, `non-histogram metrics should not have "le" label`)) + } + if !isSummary && ln == "quantile" { + problems = append(problems, newProblem(mf, `non-summary metrics should not have "quantile" label`)) + } + } + } + + return problems +} + +// lintMetricTypeInName detects when metric types are included in the metric name. +func lintMetricTypeInName(mf *dto.MetricFamily) []Problem { + var problems []Problem + n := strings.ToLower(mf.GetName()) + + for i, t := range dto.MetricType_name { + if i == int32(dto.MetricType_UNTYPED) { + continue + } + + typename := strings.ToLower(t) + if strings.Contains(n, "_"+typename+"_") || strings.HasSuffix(n, "_"+typename) { + problems = append(problems, newProblem(mf, fmt.Sprintf(`metric name should not include type '%s'`, typename))) + } + } + return problems +} + +// lintReservedChars detects colons in metric names. +func lintReservedChars(mf *dto.MetricFamily) []Problem { + var problems []Problem + if strings.Contains(mf.GetName(), ":") { + problems = append(problems, newProblem(mf, "metric names should not contain ':'")) + } + return problems +} + +var camelCase = regexp.MustCompile(`[a-z][A-Z]`) + +// lintCamelCase detects metric names and label names written in camelCase. +func lintCamelCase(mf *dto.MetricFamily) []Problem { + var problems []Problem + if camelCase.FindString(mf.GetName()) != "" { + problems = append(problems, newProblem(mf, "metric names should be written in 'snake_case' not 'camelCase'")) + } + + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + if camelCase.FindString(l.GetName()) != "" { + problems = append(problems, newProblem(mf, "label names should be written in 'snake_case' not 'camelCase'")) + } + } + } + return problems +} + +// lintUnitAbbreviations detects abbreviated units in the metric name. +func lintUnitAbbreviations(mf *dto.MetricFamily) []Problem { + var problems []Problem + n := strings.ToLower(mf.GetName()) + for _, s := range unitAbbreviations { + if strings.Contains(n, "_"+s+"_") || strings.HasSuffix(n, "_"+s) { + problems = append(problems, newProblem(mf, "metric names should not contain abbreviated units")) + } + } + return problems +} + +// metricUnits attempts to detect known unit types used as part of a metric name, +// e.g. "foo_bytes_total" or "bar_baz_milligrams". +func metricUnits(m string) (unit, base string, ok bool) { + ss := strings.Split(m, "_") + + for unit, base := range units { + // Also check for "no prefix". + for _, p := range append(unitPrefixes, "") { + for _, s := range ss { + // Attempt to explicitly match a known unit with a known prefix, + // as some words may look like "units" when matching suffix. + // + // As an example, "thermometers" should not match "meters", but + // "kilometers" should. + if s == p+unit { + return p + unit, base, true + } + } + } + } + + return "", "", false +} + +// Units and their possible prefixes recognized by this library. More can be +// added over time as needed. +var ( + // map a unit to the appropriate base unit. + units = map[string]string{ + // Base units. + "amperes": "amperes", + "bytes": "bytes", + "celsius": "celsius", // Also allow Celsius because it is common in typical Prometheus use cases. + "grams": "grams", + "joules": "joules", + "kelvin": "kelvin", // SI base unit, used in special cases (e.g. color temperature, scientific measurements). + "meters": "meters", // Both American and international spelling permitted. + "metres": "metres", + "seconds": "seconds", + "volts": "volts", + + // Non base units. + // Time. + "minutes": "seconds", + "hours": "seconds", + "days": "seconds", + "weeks": "seconds", + // Temperature. + "kelvins": "kelvin", + "fahrenheit": "celsius", + "rankine": "celsius", + // Length. + "inches": "meters", + "yards": "meters", + "miles": "meters", + // Bytes. + "bits": "bytes", + // Energy. + "calories": "joules", + // Mass. + "pounds": "grams", + "ounces": "grams", + } + + unitPrefixes = []string{ + "pico", + "nano", + "micro", + "milli", + "centi", + "deci", + "deca", + "hecto", + "kilo", + "kibi", + "mega", + "mibi", + "giga", + "gibi", + "tera", + "tebi", + "peta", + "pebi", + } + + // Common abbreviations that we'd like to discourage. + unitAbbreviations = []string{ + "s", + "ms", + "us", + "ns", + "sec", + "b", + "kb", + "mb", + "gb", + "tb", + "pb", + "m", + "h", + "d", + } +) diff --git a/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go b/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go new file mode 100644 index 0000000000..91b83b5285 --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go @@ -0,0 +1,342 @@ +// Copyright 2018 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package testutil provides helpers to test code using the prometheus package +// of client_golang. +// +// While writing unit tests to verify correct instrumentation of your code, it's +// a common mistake to mostly test the instrumentation library instead of your +// own code. Rather than verifying that a prometheus.Counter's value has changed +// as expected or that it shows up in the exposition after registration, it is +// in general more robust and more faithful to the concept of unit tests to use +// mock implementations of the prometheus.Counter and prometheus.Registerer +// interfaces that simply assert that the Add or Register methods have been +// called with the expected arguments. However, this might be overkill in simple +// scenarios. The ToFloat64 function is provided for simple inspection of a +// single-value metric, but it has to be used with caution. +// +// End-to-end tests to verify all or larger parts of the metrics exposition can +// be implemented with the CollectAndCompare or GatherAndCompare functions. The +// most appropriate use is not so much testing instrumentation of your code, but +// testing custom prometheus.Collector implementations and in particular whole +// exporters, i.e. programs that retrieve telemetry data from a 3rd party source +// and convert it into Prometheus metrics. +// +// In a similar pattern, CollectAndLint and GatherAndLint can be used to detect +// metrics that have issues with their name, type, or metadata without being +// necessarily invalid, e.g. a counter with a name missing the “_total” suffix. +package testutil + +import ( + "bytes" + "fmt" + "io" + "net/http" + "reflect" + + "github.com/davecgh/go-spew/spew" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/internal" +) + +// ToFloat64 collects all Metrics from the provided Collector. It expects that +// this results in exactly one Metric being collected, which must be a Gauge, +// Counter, or Untyped. In all other cases, ToFloat64 panics. ToFloat64 returns +// the value of the collected Metric. +// +// The Collector provided is typically a simple instance of Gauge or Counter, or +// – less commonly – a GaugeVec or CounterVec with exactly one element. But any +// Collector fulfilling the prerequisites described above will do. +// +// Use this function with caution. It is computationally very expensive and thus +// not suited at all to read values from Metrics in regular code. This is really +// only for testing purposes, and even for testing, other approaches are often +// more appropriate (see this package's documentation). +// +// A clear anti-pattern would be to use a metric type from the prometheus +// package to track values that are also needed for something else than the +// exposition of Prometheus metrics. For example, you would like to track the +// number of items in a queue because your code should reject queuing further +// items if a certain limit is reached. It is tempting to track the number of +// items in a prometheus.Gauge, as it is then easily available as a metric for +// exposition, too. However, then you would need to call ToFloat64 in your +// regular code, potentially quite often. The recommended way is to track the +// number of items conventionally (in the way you would have done it without +// considering Prometheus metrics) and then expose the number with a +// prometheus.GaugeFunc. +func ToFloat64(c prometheus.Collector) float64 { + var ( + m prometheus.Metric + mCount int + mChan = make(chan prometheus.Metric) + done = make(chan struct{}) + ) + + go func() { + for m = range mChan { + mCount++ + } + close(done) + }() + + c.Collect(mChan) + close(mChan) + <-done + + if mCount != 1 { + panic(fmt.Errorf("collected %d metrics instead of exactly 1", mCount)) + } + + pb := &dto.Metric{} + if err := m.Write(pb); err != nil { + panic(fmt.Errorf("error happened while collecting metrics: %w", err)) + } + if pb.Gauge != nil { + return pb.Gauge.GetValue() + } + if pb.Counter != nil { + return pb.Counter.GetValue() + } + if pb.Untyped != nil { + return pb.Untyped.GetValue() + } + panic(fmt.Errorf("collected a non-gauge/counter/untyped metric: %s", pb)) +} + +// CollectAndCount registers the provided Collector with a newly created +// pedantic Registry. It then calls GatherAndCount with that Registry and with +// the provided metricNames. In the unlikely case that the registration or the +// gathering fails, this function panics. (This is inconsistent with the other +// CollectAnd… functions in this package and has historical reasons. Changing +// the function signature would be a breaking change and will therefore only +// happen with the next major version bump.) +func CollectAndCount(c prometheus.Collector, metricNames ...string) int { + reg := prometheus.NewPedanticRegistry() + if err := reg.Register(c); err != nil { + panic(fmt.Errorf("registering collector failed: %w", err)) + } + result, err := GatherAndCount(reg, metricNames...) + if err != nil { + panic(err) + } + return result +} + +// GatherAndCount gathers all metrics from the provided Gatherer and counts +// them. It returns the number of metric children in all gathered metric +// families together. If any metricNames are provided, only metrics with those +// names are counted. +func GatherAndCount(g prometheus.Gatherer, metricNames ...string) (int, error) { + got, err := g.Gather() + if err != nil { + return 0, fmt.Errorf("gathering metrics failed: %w", err) + } + if metricNames != nil { + got = filterMetrics(got, metricNames) + } + + result := 0 + for _, mf := range got { + result += len(mf.GetMetric()) + } + return result, nil +} + +// ScrapeAndCompare calls a remote exporter's endpoint which is expected to return some metrics in +// plain text format. Then it compares it with the results that the `expected` would return. +// If the `metricNames` is not empty it would filter the comparison only to the given metric names. +func ScrapeAndCompare(url string, expected io.Reader, metricNames ...string) error { + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("scraping metrics failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("the scraping target returned a status code other than 200: %d", + resp.StatusCode) + } + + scraped, err := convertReaderToMetricFamily(resp.Body) + if err != nil { + return err + } + + wanted, err := convertReaderToMetricFamily(expected) + if err != nil { + return err + } + + return compareMetricFamilies(scraped, wanted, metricNames...) +} + +// CollectAndCompare registers the provided Collector with a newly created +// pedantic Registry. It then calls GatherAndCompare with that Registry and with +// the provided metricNames. +func CollectAndCompare(c prometheus.Collector, expected io.Reader, metricNames ...string) error { + reg := prometheus.NewPedanticRegistry() + if err := reg.Register(c); err != nil { + return fmt.Errorf("registering collector failed: %w", err) + } + return GatherAndCompare(reg, expected, metricNames...) +} + +// GatherAndCompare gathers all metrics from the provided Gatherer and compares +// it to an expected output read from the provided Reader in the Prometheus text +// exposition format. If any metricNames are provided, only metrics with those +// names are compared. +func GatherAndCompare(g prometheus.Gatherer, expected io.Reader, metricNames ...string) error { + return TransactionalGatherAndCompare(prometheus.ToTransactionalGatherer(g), expected, metricNames...) +} + +// TransactionalGatherAndCompare gathers all metrics from the provided Gatherer and compares +// it to an expected output read from the provided Reader in the Prometheus text +// exposition format. If any metricNames are provided, only metrics with those +// names are compared. +func TransactionalGatherAndCompare(g prometheus.TransactionalGatherer, expected io.Reader, metricNames ...string) error { + got, done, err := g.Gather() + defer done() + if err != nil { + return fmt.Errorf("gathering metrics failed: %w", err) + } + + wanted, err := convertReaderToMetricFamily(expected) + if err != nil { + return err + } + + return compareMetricFamilies(got, wanted, metricNames...) +} + +// convertReaderToMetricFamily would read from a io.Reader object and convert it to a slice of +// dto.MetricFamily. +func convertReaderToMetricFamily(reader io.Reader) ([]*dto.MetricFamily, error) { + var tp expfmt.TextParser + notNormalized, err := tp.TextToMetricFamilies(reader) + if err != nil { + return nil, fmt.Errorf("converting reader to metric families failed: %w", err) + } + + return internal.NormalizeMetricFamilies(notNormalized), nil +} + +// compareMetricFamilies would compare 2 slices of metric families, and optionally filters both of +// them to the `metricNames` provided. +func compareMetricFamilies(got, expected []*dto.MetricFamily, metricNames ...string) error { + if metricNames != nil { + got = filterMetrics(got, metricNames) + } + + return compare(got, expected) +} + +// compare encodes both provided slices of metric families into the text format, +// compares their string message, and returns an error if they do not match. +// The error contains the encoded text of both the desired and the actual +// result. +func compare(got, want []*dto.MetricFamily) error { + var gotBuf, wantBuf bytes.Buffer + enc := expfmt.NewEncoder(&gotBuf, expfmt.FmtText) + for _, mf := range got { + if err := enc.Encode(mf); err != nil { + return fmt.Errorf("encoding gathered metrics failed: %w", err) + } + } + enc = expfmt.NewEncoder(&wantBuf, expfmt.FmtText) + for _, mf := range want { + if err := enc.Encode(mf); err != nil { + return fmt.Errorf("encoding expected metrics failed: %w", err) + } + } + if diffErr := diff(wantBuf, gotBuf); diffErr != "" { + return fmt.Errorf(diffErr) + } + return nil +} + +// diff returns a diff of both values as long as both are of the same type and +// are a struct, map, slice, array or string. Otherwise it returns an empty string. +func diff(expected, actual interface{}) string { + if expected == nil || actual == nil { + return "" + } + + et, ek := typeAndKind(expected) + at, _ := typeAndKind(actual) + if et != at { + return "" + } + + if ek != reflect.Struct && ek != reflect.Map && ek != reflect.Slice && ek != reflect.Array && ek != reflect.String { + return "" + } + + var e, a string + c := spew.ConfigState{ + Indent: " ", + DisablePointerAddresses: true, + DisableCapacities: true, + SortKeys: true, + } + if et != reflect.TypeOf("") { + e = c.Sdump(expected) + a = c.Sdump(actual) + } else { + e = reflect.ValueOf(expected).String() + a = reflect.ValueOf(actual).String() + } + + diff, _ := internal.GetUnifiedDiffString(internal.UnifiedDiff{ + A: internal.SplitLines(e), + B: internal.SplitLines(a), + FromFile: "metric output does not match expectation; want", + FromDate: "", + ToFile: "got:", + ToDate: "", + Context: 1, + }) + + if diff == "" { + return "" + } + + return "\n\nDiff:\n" + diff +} + +// typeAndKind returns the type and kind of the given interface{} +func typeAndKind(v interface{}) (reflect.Type, reflect.Kind) { + t := reflect.TypeOf(v) + k := t.Kind() + + if k == reflect.Ptr { + t = t.Elem() + k = t.Kind() + } + return t, k +} + +func filterMetrics(metrics []*dto.MetricFamily, names []string) []*dto.MetricFamily { + var filtered []*dto.MetricFamily + for _, m := range metrics { + for _, name := range names { + if m.GetName() == name { + filtered = append(filtered, m) + break + } + } + } + return filtered +} diff --git a/vendor/k8s.io/component-base/metrics/testutil/metrics.go b/vendor/k8s.io/component-base/metrics/testutil/metrics.go new file mode 100644 index 0000000000..df3f8ee0c1 --- /dev/null +++ b/vendor/k8s.io/component-base/metrics/testutil/metrics.go @@ -0,0 +1,435 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testutil + +import ( + "fmt" + "io" + "math" + "reflect" + "sort" + "strings" + + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" + + "k8s.io/component-base/metrics" +) + +var ( + // MetricNameLabel is label under which model.Sample stores metric name + MetricNameLabel model.LabelName = model.MetricNameLabel + // QuantileLabel is label under which model.Sample stores latency quantile value + QuantileLabel model.LabelName = model.QuantileLabel +) + +// Metrics is generic metrics for other specific metrics +type Metrics map[string]model.Samples + +// Equal returns true if all metrics are the same as the arguments. +func (m *Metrics) Equal(o Metrics) bool { + var leftKeySet []string + var rightKeySet []string + for k := range *m { + leftKeySet = append(leftKeySet, k) + } + for k := range o { + rightKeySet = append(rightKeySet, k) + } + if !reflect.DeepEqual(leftKeySet, rightKeySet) { + return false + } + for _, k := range leftKeySet { + if !(*m)[k].Equal(o[k]) { + return false + } + } + return true +} + +// NewMetrics returns new metrics which are initialized. +func NewMetrics() Metrics { + result := make(Metrics) + return result +} + +// ParseMetrics parses Metrics from data returned from prometheus endpoint +func ParseMetrics(data string, output *Metrics) error { + dec := expfmt.NewDecoder(strings.NewReader(data), expfmt.FmtText) + decoder := expfmt.SampleDecoder{ + Dec: dec, + Opts: &expfmt.DecodeOptions{}, + } + + for { + var v model.Vector + if err := decoder.Decode(&v); err != nil { + if err == io.EOF { + // Expected loop termination condition. + return nil + } + continue + } + for _, metric := range v { + name := string(metric.Metric[MetricNameLabel]) + (*output)[name] = append((*output)[name], metric) + } + } +} + +// TextToMetricFamilies reads 'in' as the simple and flat text-based exchange +// format and creates MetricFamily proto messages. It returns the MetricFamily +// proto messages in a map where the metric names are the keys, along with any +// error encountered. +func TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) { + var textParser expfmt.TextParser + return textParser.TextToMetricFamilies(in) +} + +// PrintSample returns formatted representation of metric Sample +func PrintSample(sample *model.Sample) string { + buf := make([]string, 0) + // Id is a VERY special label. For 'normal' container it's useless, but it's necessary + // for 'system' containers (e.g. /docker-daemon, /kubelet, etc.). We know if that's the + // case by checking if there's a label "kubernetes_container_name" present. It's hacky + // but it works... + _, normalContainer := sample.Metric["kubernetes_container_name"] + for k, v := range sample.Metric { + if strings.HasPrefix(string(k), "__") { + continue + } + + if string(k) == "id" && normalContainer { + continue + } + buf = append(buf, fmt.Sprintf("%v=%v", string(k), v)) + } + return fmt.Sprintf("[%v] = %v", strings.Join(buf, ","), sample.Value) +} + +// ComputeHistogramDelta computes the change in histogram metric for a selected label. +// Results are stored in after samples +func ComputeHistogramDelta(before, after model.Samples, label model.LabelName) { + beforeSamplesMap := make(map[string]*model.Sample) + for _, bSample := range before { + beforeSamplesMap[makeKey(bSample.Metric[label], bSample.Metric["le"])] = bSample + } + for _, aSample := range after { + if bSample, found := beforeSamplesMap[makeKey(aSample.Metric[label], aSample.Metric["le"])]; found { + aSample.Value = aSample.Value - bSample.Value + } + } +} + +func makeKey(a, b model.LabelValue) string { + return string(a) + "___" + string(b) +} + +// GetMetricValuesForLabel returns value of metric for a given dimension +func GetMetricValuesForLabel(ms Metrics, metricName, label string) map[string]int64 { + samples, found := ms[metricName] + result := make(map[string]int64, len(samples)) + if !found { + return result + } + for _, sample := range samples { + count := int64(sample.Value) + dimensionName := string(sample.Metric[model.LabelName(label)]) + result[dimensionName] = count + } + return result +} + +// ValidateMetrics verifies if every sample of metric has all expected labels +func ValidateMetrics(metrics Metrics, metricName string, expectedLabels ...string) error { + samples, ok := metrics[metricName] + if !ok { + return fmt.Errorf("metric %q was not found in metrics", metricName) + } + for _, sample := range samples { + for _, l := range expectedLabels { + if _, ok := sample.Metric[model.LabelName(l)]; !ok { + return fmt.Errorf("metric %q is missing label %q, sample: %q", metricName, l, sample.String()) + } + } + } + return nil +} + +// Histogram wraps prometheus histogram DTO (data transfer object) +type Histogram struct { + *dto.Histogram +} + +// HistogramVec wraps a slice of Histogram. +// Note that each Histogram must have the same number of buckets. +type HistogramVec []*Histogram + +// GetAggregatedSampleCount aggregates the sample count of each inner Histogram. +func (vec HistogramVec) GetAggregatedSampleCount() uint64 { + var count uint64 + for _, hist := range vec { + count += hist.GetSampleCount() + } + return count +} + +// GetAggregatedSampleSum aggregates the sample sum of each inner Histogram. +func (vec HistogramVec) GetAggregatedSampleSum() float64 { + var sum float64 + for _, hist := range vec { + sum += hist.GetSampleSum() + } + return sum +} + +// Quantile first aggregates inner buckets of each Histogram, and then +// computes q-th quantile of a cumulative histogram. +func (vec HistogramVec) Quantile(q float64) float64 { + var buckets []bucket + + for i, hist := range vec { + for j, bckt := range hist.Bucket { + if i == 0 { + buckets = append(buckets, bucket{ + count: float64(bckt.GetCumulativeCount()), + upperBound: bckt.GetUpperBound(), + }) + } else { + buckets[j].count += float64(bckt.GetCumulativeCount()) + } + } + } + + if len(buckets) == 0 || buckets[len(buckets)-1].upperBound != math.Inf(+1) { + // The list of buckets in dto.Histogram doesn't include the final +Inf bucket, so we + // add it here for the rest of the samples. + buckets = append(buckets, bucket{ + count: float64(vec.GetAggregatedSampleCount()), + upperBound: math.Inf(+1), + }) + } + + return bucketQuantile(q, buckets) +} + +// Average computes wrapped histograms' average value. +func (vec HistogramVec) Average() float64 { + return vec.GetAggregatedSampleSum() / float64(vec.GetAggregatedSampleCount()) +} + +// Validate makes sure the wrapped histograms have all necessary fields set and with valid values. +func (vec HistogramVec) Validate() error { + bucketSize := 0 + for i, hist := range vec { + if err := hist.Validate(); err != nil { + return err + } + if i == 0 { + bucketSize = len(hist.GetBucket()) + } else if bucketSize != len(hist.GetBucket()) { + return fmt.Errorf("found different bucket size: expect %v, but got %v at index %v", bucketSize, len(hist.GetBucket()), i) + } + } + return nil +} + +// GetHistogramVecFromGatherer collects a metric, that matches the input labelValue map, +// from a gatherer implementing k8s.io/component-base/metrics.Gatherer interface. +// Used only for testing purposes where we need to gather metrics directly from a running binary (without metrics endpoint). +func GetHistogramVecFromGatherer(gatherer metrics.Gatherer, metricName string, lvMap map[string]string) (HistogramVec, error) { + var metricFamily *dto.MetricFamily + m, err := gatherer.Gather() + if err != nil { + return nil, err + } + for _, mFamily := range m { + if mFamily.GetName() == metricName { + metricFamily = mFamily + break + } + } + + if metricFamily == nil { + return nil, fmt.Errorf("metric %q not found", metricName) + } + + if len(metricFamily.GetMetric()) == 0 { + return nil, fmt.Errorf("metric %q is empty", metricName) + } + + vec := make(HistogramVec, 0) + for _, metric := range metricFamily.GetMetric() { + if LabelsMatch(metric, lvMap) { + if hist := metric.GetHistogram(); hist != nil { + vec = append(vec, &Histogram{hist}) + } + } + } + return vec, nil +} + +func uint64Ptr(u uint64) *uint64 { + return &u +} + +// Bucket of a histogram +type bucket struct { + upperBound float64 + count float64 +} + +func bucketQuantile(q float64, buckets []bucket) float64 { + if q < 0 { + return math.Inf(-1) + } + if q > 1 { + return math.Inf(+1) + } + + if len(buckets) < 2 { + return math.NaN() + } + + rank := q * buckets[len(buckets)-1].count + b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank }) + + if b == 0 { + return buckets[0].upperBound * (rank / buckets[0].count) + } + + if b == len(buckets)-1 && math.IsInf(buckets[b].upperBound, 1) { + return buckets[len(buckets)-2].upperBound + } + + // linear approximation of b-th bucket + brank := rank - buckets[b-1].count + bSize := buckets[b].upperBound - buckets[b-1].upperBound + bCount := buckets[b].count - buckets[b-1].count + + return buckets[b-1].upperBound + bSize*(brank/bCount) +} + +// Quantile computes q-th quantile of a cumulative histogram. +// It's expected the histogram is valid (by calling Validate) +func (hist *Histogram) Quantile(q float64) float64 { + var buckets []bucket + + for _, bckt := range hist.Bucket { + buckets = append(buckets, bucket{ + count: float64(bckt.GetCumulativeCount()), + upperBound: bckt.GetUpperBound(), + }) + } + + if len(buckets) == 0 || buckets[len(buckets)-1].upperBound != math.Inf(+1) { + // The list of buckets in dto.Histogram doesn't include the final +Inf bucket, so we + // add it here for the rest of the samples. + buckets = append(buckets, bucket{ + count: float64(hist.GetSampleCount()), + upperBound: math.Inf(+1), + }) + } + + return bucketQuantile(q, buckets) +} + +// Average computes histogram's average value +func (hist *Histogram) Average() float64 { + return hist.GetSampleSum() / float64(hist.GetSampleCount()) +} + +// Validate makes sure the wrapped histogram has all necessary fields set and with valid values. +func (hist *Histogram) Validate() error { + if hist.SampleCount == nil || hist.GetSampleCount() == 0 { + return fmt.Errorf("nil or empty histogram SampleCount") + } + + if hist.SampleSum == nil || hist.GetSampleSum() == 0 { + return fmt.Errorf("nil or empty histogram SampleSum") + } + + for _, bckt := range hist.Bucket { + if bckt == nil { + return fmt.Errorf("empty histogram bucket") + } + if bckt.UpperBound == nil || bckt.GetUpperBound() < 0 { + return fmt.Errorf("nil or negative histogram bucket UpperBound") + } + } + + return nil +} + +// GetGaugeMetricValue extracts metric value from GaugeMetric +func GetGaugeMetricValue(m metrics.GaugeMetric) (float64, error) { + metricProto := &dto.Metric{} + if err := m.Write(metricProto); err != nil { + return 0, fmt.Errorf("error writing m: %v", err) + } + return metricProto.Gauge.GetValue(), nil +} + +// GetCounterMetricValue extracts metric value from CounterMetric +func GetCounterMetricValue(m metrics.CounterMetric) (float64, error) { + metricProto := &dto.Metric{} + if err := m.(metrics.Metric).Write(metricProto); err != nil { + return 0, fmt.Errorf("error writing m: %v", err) + } + return metricProto.Counter.GetValue(), nil +} + +// GetHistogramMetricValue extracts sum of all samples from ObserverMetric +func GetHistogramMetricValue(m metrics.ObserverMetric) (float64, error) { + metricProto := &dto.Metric{} + if err := m.(metrics.Metric).Write(metricProto); err != nil { + return 0, fmt.Errorf("error writing m: %v", err) + } + return metricProto.Histogram.GetSampleSum(), nil +} + +// GetHistogramMetricCount extracts count of all samples from ObserverMetric +func GetHistogramMetricCount(m metrics.ObserverMetric) (uint64, error) { + metricProto := &dto.Metric{} + if err := m.(metrics.Metric).Write(metricProto); err != nil { + return 0, fmt.Errorf("error writing m: %v", err) + } + return metricProto.Histogram.GetSampleCount(), nil +} + +// LabelsMatch returns true if metric has all expected labels otherwise false +func LabelsMatch(metric *dto.Metric, labelFilter map[string]string) bool { + metricLabels := map[string]string{} + + for _, labelPair := range metric.Label { + metricLabels[labelPair.GetName()] = labelPair.GetValue() + } + + // length comparison then match key to values in the maps + if len(labelFilter) > len(metricLabels) { + return false + } + + for labelName, labelValue := range labelFilter { + if value, ok := metricLabels[labelName]; !ok || value != labelValue { + return false + } + } + + return true +} diff --git a/vendor/k8s.io/component-base/metrics/testutil/promlint.go b/vendor/k8s.io/component-base/metrics/testutil/promlint.go new file mode 100644 index 0000000000..4c537be225 --- /dev/null +++ b/vendor/k8s.io/component-base/metrics/testutil/promlint.go @@ -0,0 +1,151 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testutil + +import ( + "fmt" + "io" + "strings" + + "github.com/prometheus/client_golang/prometheus/testutil/promlint" +) + +// exceptionMetrics is an exception list of metrics which violates promlint rules. +// +// The original entries come from the existing metrics when we introduce promlint. +// We setup this list for allow and not fail on the current violations. +// Generally speaking, you need to fix the problem for a new metric rather than add it into the list. +var exceptionMetrics = []string{ + // k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/server/egressselector + "apiserver_egress_dialer_dial_failure_count", // counter metrics should have "_total" suffix + + // k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/server/healthz + "apiserver_request_total", // label names should be written in 'snake_case' not 'camelCase' + + // k8s.io/kubernetes/vendor/k8s.io/apiserver/pkg/endpoints/filters + "authenticated_user_requests", // counter metrics should have "_total" suffix + "authentication_attempts", // counter metrics should have "_total" suffix + + // kube-apiserver + "aggregator_openapi_v2_regeneration_count", + "apiserver_admission_step_admission_duration_seconds_summary", + "apiserver_current_inflight_requests", + "apiserver_longrunning_gauge", + "get_token_count", + "get_token_fail_count", + "ssh_tunnel_open_count", + "ssh_tunnel_open_fail_count", + + // kube-controller-manager + "attachdetach_controller_forced_detaches", + "authenticated_user_requests", + "authentication_attempts", + "get_token_count", + "get_token_fail_count", + "node_collector_evictions_number", +} + +// A Problem is an issue detected by a Linter. +type Problem promlint.Problem + +func (p *Problem) String() string { + return fmt.Sprintf("%s:%s", p.Metric, p.Text) +} + +// A Linter is a Prometheus metrics linter. It identifies issues with metric +// names, types, and metadata, and reports them to the caller. +type Linter struct { + promLinter *promlint.Linter +} + +// Lint performs a linting pass, returning a slice of Problems indicating any +// issues found in the metrics stream. The slice is sorted by metric name +// and issue description. +func (l *Linter) Lint() ([]Problem, error) { + promProblems, err := l.promLinter.Lint() + if err != nil { + return nil, err + } + + // Ignore problems those in exception list + problems := make([]Problem, 0, len(promProblems)) + for i := range promProblems { + if !l.shouldIgnore(promProblems[i].Metric) { + problems = append(problems, Problem(promProblems[i])) + } + } + + return problems, nil +} + +// shouldIgnore returns true if metric in the exception list, otherwise returns false. +func (l *Linter) shouldIgnore(metricName string) bool { + for i := range exceptionMetrics { + if metricName == exceptionMetrics[i] { + return true + } + } + + return false +} + +// NewPromLinter creates a new Linter that reads an input stream of Prometheus metrics. +// Only the text exposition format is supported. +func NewPromLinter(r io.Reader) *Linter { + return &Linter{ + promLinter: promlint.New(r), + } +} + +func mergeProblems(problems []Problem) string { + var problemsMsg []string + + for index := range problems { + problemsMsg = append(problemsMsg, problems[index].String()) + } + + return strings.Join(problemsMsg, ",") +} + +// shouldIgnore returns true if metric in the exception list, otherwise returns false. +func shouldIgnore(metricName string) bool { + for i := range exceptionMetrics { + if metricName == exceptionMetrics[i] { + return true + } + } + + return false +} + +// getLintError will ignore the metrics in exception list and converts lint problem to error. +func getLintError(problems []promlint.Problem) error { + var filteredProblems []Problem + for _, problem := range problems { + if shouldIgnore(problem.Metric) { + continue + } + + filteredProblems = append(filteredProblems, Problem(problem)) + } + + if len(filteredProblems) == 0 { + return nil + } + + return fmt.Errorf("lint error: %s", mergeProblems(filteredProblems)) +} diff --git a/vendor/k8s.io/component-base/metrics/testutil/testutil.go b/vendor/k8s.io/component-base/metrics/testutil/testutil.go new file mode 100644 index 0000000000..439045989c --- /dev/null +++ b/vendor/k8s.io/component-base/metrics/testutil/testutil.go @@ -0,0 +1,86 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testutil + +import ( + "fmt" + "io" + + "github.com/prometheus/client_golang/prometheus/testutil" + + apimachineryversion "k8s.io/apimachinery/pkg/version" + "k8s.io/component-base/metrics" +) + +// CollectAndCompare registers the provided Collector with a newly created +// pedantic Registry. It then does the same as GatherAndCompare, gathering the +// metrics from the pedantic Registry. +func CollectAndCompare(c metrics.Collector, expected io.Reader, metricNames ...string) error { + lintProblems, err := testutil.CollectAndLint(c, metricNames...) + if err != nil { + return err + } + if err := getLintError(lintProblems); err != nil { + return err + } + + return testutil.CollectAndCompare(c, expected, metricNames...) +} + +// GatherAndCompare gathers all metrics from the provided Gatherer and compares +// it to an expected output read from the provided Reader in the Prometheus text +// exposition format. If any metricNames are provided, only metrics with those +// names are compared. +func GatherAndCompare(g metrics.Gatherer, expected io.Reader, metricNames ...string) error { + lintProblems, err := testutil.GatherAndLint(g, metricNames...) + if err != nil { + return err + } + if err := getLintError(lintProblems); err != nil { + return err + } + + return testutil.GatherAndCompare(g, expected, metricNames...) +} + +// CustomCollectAndCompare registers the provided StableCollector with a newly created +// registry. It then does the same as GatherAndCompare, gathering the +// metrics from the pedantic Registry. +func CustomCollectAndCompare(c metrics.StableCollector, expected io.Reader, metricNames ...string) error { + registry := metrics.NewKubeRegistry() + registry.CustomMustRegister(c) + + return GatherAndCompare(registry, expected, metricNames...) +} + +// NewFakeKubeRegistry creates a fake `KubeRegistry` that takes the input version as `build in version`. +// It should only be used in testing scenario especially for the deprecated metrics. +// The input version format should be `major.minor.patch`, e.g. '1.18.0'. +func NewFakeKubeRegistry(ver string) metrics.KubeRegistry { + backup := metrics.BuildVersion + defer func() { + metrics.BuildVersion = backup + }() + + metrics.BuildVersion = func() apimachineryversion.Info { + return apimachineryversion.Info{ + GitVersion: fmt.Sprintf("v%s-alpha+1.12345", ver), + } + } + + return metrics.NewKubeRegistry() +} diff --git a/vendor/k8s.io/kubelet/LICENSE b/vendor/k8s.io/kubelet/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/vendor/k8s.io/kubelet/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go b/vendor/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go new file mode 100644 index 0000000000..5e75fefe53 --- /dev/null +++ b/vendor/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go @@ -0,0 +1,358 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Summary is a top-level container for holding NodeStats and PodStats. +type Summary struct { + // Overall node stats. + Node NodeStats `json:"node"` + // Per-pod stats. + Pods []PodStats `json:"pods"` +} + +// NodeStats holds node-level unprocessed sample stats. +type NodeStats struct { + // Reference to the measured Node. + NodeName string `json:"nodeName"` + // Stats of system daemons tracked as raw containers. + // The system containers are named according to the SystemContainer* constants. + // +optional + // +patchMergeKey=name + // +patchStrategy=merge + SystemContainers []ContainerStats `json:"systemContainers,omitempty" patchStrategy:"merge" patchMergeKey:"name"` + // The time at which data collection for the node-scoped (i.e. aggregate) stats was (re)started. + StartTime metav1.Time `json:"startTime"` + // Stats pertaining to CPU resources. + // +optional + CPU *CPUStats `json:"cpu,omitempty"` + // Stats pertaining to memory (RAM) resources. + // +optional + Memory *MemoryStats `json:"memory,omitempty"` + // Stats pertaining to network resources. + // +optional + Network *NetworkStats `json:"network,omitempty"` + // Stats pertaining to total usage of filesystem resources on the rootfs used by node k8s components. + // NodeFs.Used is the total bytes used on the filesystem. + // +optional + Fs *FsStats `json:"fs,omitempty"` + // Stats about the underlying container runtime. + // +optional + Runtime *RuntimeStats `json:"runtime,omitempty"` + // Stats about the rlimit of system. + // +optional + Rlimit *RlimitStats `json:"rlimit,omitempty"` +} + +// RlimitStats are stats rlimit of OS. +type RlimitStats struct { + Time metav1.Time `json:"time"` + + // The max number of extant process (threads, precisely on Linux) of OS. See RLIMIT_NPROC in getrlimit(2). + // The operating system ceiling on the number of process IDs that can be assigned. + // On Linux, tasks (either processes or threads) consume 1 PID each. + MaxPID *int64 `json:"maxpid,omitempty"` + // The number of running process (threads, precisely on Linux) in the OS. + NumOfRunningProcesses *int64 `json:"curproc,omitempty"` +} + +// RuntimeStats are stats pertaining to the underlying container runtime. +type RuntimeStats struct { + // Stats about the underlying filesystem where container images are stored. + // This filesystem could be the same as the primary (root) filesystem. + // Usage here refers to the total number of bytes occupied by images on the filesystem. + // +optional + ImageFs *FsStats `json:"imageFs,omitempty"` +} + +const ( + // SystemContainerKubelet is the container name for the system container tracking Kubelet usage. + SystemContainerKubelet = "kubelet" + // SystemContainerRuntime is the container name for the system container tracking the runtime (e.g. docker) usage. + SystemContainerRuntime = "runtime" + // SystemContainerMisc is the container name for the system container tracking non-kubernetes processes. + SystemContainerMisc = "misc" + // SystemContainerPods is the container name for the system container tracking user pods. + SystemContainerPods = "pods" +) + +// ProcessStats are stats pertaining to processes. +type ProcessStats struct { + // Number of processes + // +optional + ProcessCount *uint64 `json:"process_count,omitempty"` +} + +// PodStats holds pod-level unprocessed sample stats. +type PodStats struct { + // Reference to the measured Pod. + PodRef PodReference `json:"podRef"` + // The time at which data collection for the pod-scoped (e.g. network) stats was (re)started. + StartTime metav1.Time `json:"startTime"` + // Stats of containers in the measured pod. + // +patchMergeKey=name + // +patchStrategy=merge + Containers []ContainerStats `json:"containers" patchStrategy:"merge" patchMergeKey:"name"` + // Stats pertaining to CPU resources consumed by pod cgroup (which includes all containers' resource usage and pod overhead). + // +optional + CPU *CPUStats `json:"cpu,omitempty"` + // Stats pertaining to memory (RAM) resources consumed by pod cgroup (which includes all containers' resource usage and pod overhead). + // +optional + Memory *MemoryStats `json:"memory,omitempty"` + // Stats pertaining to network resources. + // +optional + Network *NetworkStats `json:"network,omitempty"` + // Stats pertaining to volume usage of filesystem resources. + // VolumeStats.UsedBytes is the number of bytes used by the Volume + // +optional + // +patchMergeKey=name + // +patchStrategy=merge + VolumeStats []VolumeStats `json:"volume,omitempty" patchStrategy:"merge" patchMergeKey:"name"` + // EphemeralStorage reports the total filesystem usage for the containers and emptyDir-backed volumes in the measured Pod. + // +optional + EphemeralStorage *FsStats `json:"ephemeral-storage,omitempty"` + // ProcessStats pertaining to processes. + // +optional + ProcessStats *ProcessStats `json:"process_stats,omitempty"` +} + +// ContainerStats holds container-level unprocessed sample stats. +type ContainerStats struct { + // Reference to the measured container. + Name string `json:"name"` + // The time at which data collection for this container was (re)started. + StartTime metav1.Time `json:"startTime"` + // Stats pertaining to CPU resources. + // +optional + CPU *CPUStats `json:"cpu,omitempty"` + // Stats pertaining to memory (RAM) resources. + // +optional + Memory *MemoryStats `json:"memory,omitempty"` + // Metrics for Accelerators. Each Accelerator corresponds to one element in the array. + Accelerators []AcceleratorStats `json:"accelerators,omitempty"` + // Stats pertaining to container rootfs usage of filesystem resources. + // Rootfs.UsedBytes is the number of bytes used for the container write layer. + // +optional + Rootfs *FsStats `json:"rootfs,omitempty"` + // Stats pertaining to container logs usage of filesystem resources. + // Logs.UsedBytes is the number of bytes used for the container logs. + // +optional + Logs *FsStats `json:"logs,omitempty"` + // User defined metrics that are exposed by containers in the pod. Typically, we expect only one container in the pod to be exposing user defined metrics. In the event of multiple containers exposing metrics, they will be combined here. + // +patchMergeKey=name + // +patchStrategy=merge + UserDefinedMetrics []UserDefinedMetric `json:"userDefinedMetrics,omitempty" patchStrategy:"merge" patchMergeKey:"name"` +} + +// PodReference contains enough information to locate the referenced pod. +type PodReference struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + UID string `json:"uid"` +} + +// InterfaceStats contains resource value data about interface. +type InterfaceStats struct { + // The name of the interface + Name string `json:"name"` + // Cumulative count of bytes received. + // +optional + RxBytes *uint64 `json:"rxBytes,omitempty"` + // Cumulative count of receive errors encountered. + // +optional + RxErrors *uint64 `json:"rxErrors,omitempty"` + // Cumulative count of bytes transmitted. + // +optional + TxBytes *uint64 `json:"txBytes,omitempty"` + // Cumulative count of transmit errors encountered. + // +optional + TxErrors *uint64 `json:"txErrors,omitempty"` +} + +// NetworkStats contains data about network resources. +type NetworkStats struct { + // The time at which these stats were updated. + Time metav1.Time `json:"time"` + + // Stats for the default interface, if found + InterfaceStats `json:",inline"` + + Interfaces []InterfaceStats `json:"interfaces,omitempty"` +} + +// CPUStats contains data about CPU usage. +type CPUStats struct { + // The time at which these stats were updated. + Time metav1.Time `json:"time"` + // Total CPU usage (sum of all cores) averaged over the sample window. + // The "core" unit can be interpreted as CPU core-nanoseconds per second. + // +optional + UsageNanoCores *uint64 `json:"usageNanoCores,omitempty"` + // Cumulative CPU usage (sum of all cores) since object creation. + // +optional + UsageCoreNanoSeconds *uint64 `json:"usageCoreNanoSeconds,omitempty"` +} + +// MemoryStats contains data about memory usage. +type MemoryStats struct { + // The time at which these stats were updated. + Time metav1.Time `json:"time"` + // Available memory for use. This is defined as the memory limit - workingSetBytes. + // If memory limit is undefined, the available bytes is omitted. + // +optional + AvailableBytes *uint64 `json:"availableBytes,omitempty"` + // Total memory in use. This includes all memory regardless of when it was accessed. + // +optional + UsageBytes *uint64 `json:"usageBytes,omitempty"` + // The amount of working set memory. This includes recently accessed memory, + // dirty memory, and kernel memory. WorkingSetBytes is <= UsageBytes + // +optional + WorkingSetBytes *uint64 `json:"workingSetBytes,omitempty"` + // The amount of anonymous and swap cache memory (includes transparent + // hugepages). + // +optional + RSSBytes *uint64 `json:"rssBytes,omitempty"` + // Cumulative number of minor page faults. + // +optional + PageFaults *uint64 `json:"pageFaults,omitempty"` + // Cumulative number of major page faults. + // +optional + MajorPageFaults *uint64 `json:"majorPageFaults,omitempty"` +} + +// AcceleratorStats contains stats for accelerators attached to the container. +type AcceleratorStats struct { + // Make of the accelerator (nvidia, amd, google etc.) + Make string `json:"make"` + + // Model of the accelerator (tesla-p100, tesla-k80 etc.) + Model string `json:"model"` + + // ID of the accelerator. + ID string `json:"id"` + + // Total accelerator memory. + // unit: bytes + MemoryTotal uint64 `json:"memoryTotal"` + + // Total accelerator memory allocated. + // unit: bytes + MemoryUsed uint64 `json:"memoryUsed"` + + // Percent of time over the past sample period (10s) during which + // the accelerator was actively processing. + DutyCycle uint64 `json:"dutyCycle"` +} + +// VolumeStats contains data about Volume filesystem usage. +type VolumeStats struct { + // Embedded FsStats + FsStats `json:",inline"` + // Name is the name given to the Volume + // +optional + Name string `json:"name,omitempty"` + // Reference to the PVC, if one exists + // +optional + PVCRef *PVCReference `json:"pvcRef,omitempty"` + + // VolumeHealthStats contains data about volume health + // +optional + VolumeHealthStats *VolumeHealthStats `json:"volumeHealthStats,omitempty"` +} + +// VolumeHealthStats contains data about volume health. +type VolumeHealthStats struct { + // Normal volumes are available for use and operating optimally. + // An abnormal volume does not meet these criteria. + Abnormal bool `json:"abnormal"` +} + +// PVCReference contains enough information to describe the referenced PVC. +type PVCReference struct { + Name string `json:"name"` + Namespace string `json:"namespace"` +} + +// FsStats contains data about filesystem usage. +type FsStats struct { + // The time at which these stats were updated. + Time metav1.Time `json:"time"` + // AvailableBytes represents the storage space available (bytes) for the filesystem. + // +optional + AvailableBytes *uint64 `json:"availableBytes,omitempty"` + // CapacityBytes represents the total capacity (bytes) of the filesystems underlying storage. + // +optional + CapacityBytes *uint64 `json:"capacityBytes,omitempty"` + // UsedBytes represents the bytes used for a specific task on the filesystem. + // This may differ from the total bytes used on the filesystem and may not equal CapacityBytes - AvailableBytes. + // e.g. For ContainerStats.Rootfs this is the bytes used by the container rootfs on the filesystem. + // +optional + UsedBytes *uint64 `json:"usedBytes,omitempty"` + // InodesFree represents the free inodes in the filesystem. + // +optional + InodesFree *uint64 `json:"inodesFree,omitempty"` + // Inodes represents the total inodes in the filesystem. + // +optional + Inodes *uint64 `json:"inodes,omitempty"` + // InodesUsed represents the inodes used by the filesystem + // This may not equal Inodes - InodesFree because this filesystem may share inodes with other "filesystems" + // e.g. For ContainerStats.Rootfs, this is the inodes used only by that container, and does not count inodes used by other containers. + InodesUsed *uint64 `json:"inodesUsed,omitempty"` +} + +// UserDefinedMetricType defines how the metric should be interpreted by the user. +type UserDefinedMetricType string + +const ( + // MetricGauge is an instantaneous value. May increase or decrease. + MetricGauge UserDefinedMetricType = "gauge" + + // MetricCumulative is a counter-like value that is only expected to increase. + MetricCumulative UserDefinedMetricType = "cumulative" + + // MetricDelta is a rate over a time period. + MetricDelta UserDefinedMetricType = "delta" +) + +// UserDefinedMetricDescriptor contains metadata that describes a user defined metric. +type UserDefinedMetricDescriptor struct { + // The name of the metric. + Name string `json:"name"` + + // Type of the metric. + Type UserDefinedMetricType `json:"type"` + + // Display Units for the stats. + Units string `json:"units"` + + // Metadata labels associated with this metric. + // +optional + Labels map[string]string `json:"labels,omitempty"` +} + +// UserDefinedMetric represents a metric defined and generated by users. +type UserDefinedMetric struct { + UserDefinedMetricDescriptor `json:",inline"` + // The time at which these stats were updated. + Time metav1.Time `json:"time"` + // Value of the metric. Float64s have 53 bit precision. + // We do not foresee any metrics exceeding that value. + Value float64 `json:"value"` +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/debug/dump.go b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/dump.go new file mode 100644 index 0000000000..9244bf0e00 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/dump.go @@ -0,0 +1,187 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package debug + +import ( + "context" + "fmt" + "sort" + "time" + + "github.com/onsi/ginkgo/v2" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + clientset "k8s.io/client-go/kubernetes" + restclient "k8s.io/client-go/rest" + "k8s.io/kubernetes/test/e2e/framework" + e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" +) + +// EventsLister is a func that lists events. +type EventsLister func(opts metav1.ListOptions, ns string) (*v1.EventList, error) + +// dumpEventsInNamespace dumps events in the given namespace. +func dumpEventsInNamespace(eventsLister EventsLister, namespace string) { + ginkgo.By(fmt.Sprintf("Collecting events from namespace %q.", namespace)) + events, err := eventsLister(metav1.ListOptions{}, namespace) + framework.ExpectNoError(err, "failed to list events in namespace %q", namespace) + + ginkgo.By(fmt.Sprintf("Found %d events.", len(events.Items))) + // Sort events by their first timestamp + sortedEvents := events.Items + if len(sortedEvents) > 1 { + sort.Sort(byFirstTimestamp(sortedEvents)) + } + for _, e := range sortedEvents { + framework.Logf("At %v - event for %v: %v %v: %v", e.FirstTimestamp, e.InvolvedObject.Name, e.Source, e.Reason, e.Message) + } + // Note that we don't wait for any Cleanup to propagate, which means + // that if you delete a bunch of pods right before ending your test, + // you may or may not see the killing/deletion/Cleanup events. +} + +// DumpAllNamespaceInfo dumps events, pods and nodes information in the given namespace. +func DumpAllNamespaceInfo(c clientset.Interface, namespace string) { + dumpEventsInNamespace(func(opts metav1.ListOptions, ns string) (*v1.EventList, error) { + return c.CoreV1().Events(ns).List(context.TODO(), opts) + }, namespace) + + e2epod.DumpAllPodInfoForNamespace(c, namespace, framework.TestContext.ReportDir) + + // If cluster is large, then the following logs are basically useless, because: + // 1. it takes tens of minutes or hours to grab all of them + // 2. there are so many of them that working with them are mostly impossible + // So we dump them only if the cluster is relatively small. + maxNodesForDump := framework.TestContext.MaxNodesToGather + nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + if err != nil { + framework.Logf("unable to fetch node list: %v", err) + return + } + if len(nodes.Items) <= maxNodesForDump { + dumpAllNodeInfo(c, nodes) + } else { + framework.Logf("skipping dumping cluster info - cluster too large") + } +} + +// byFirstTimestamp sorts a slice of events by first timestamp, using their involvedObject's name as a tie breaker. +type byFirstTimestamp []v1.Event + +func (o byFirstTimestamp) Len() int { return len(o) } +func (o byFirstTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } + +func (o byFirstTimestamp) Less(i, j int) bool { + if o[i].FirstTimestamp.Equal(&o[j].FirstTimestamp) { + return o[i].InvolvedObject.Name < o[j].InvolvedObject.Name + } + return o[i].FirstTimestamp.Before(&o[j].FirstTimestamp) +} + +func dumpAllNodeInfo(c clientset.Interface, nodes *v1.NodeList) { + names := make([]string, len(nodes.Items)) + for ix := range nodes.Items { + names[ix] = nodes.Items[ix].Name + } + DumpNodeDebugInfo(c, names, framework.Logf) +} + +// DumpNodeDebugInfo dumps debug information of the given nodes. +func DumpNodeDebugInfo(c clientset.Interface, nodeNames []string, logFunc func(fmt string, args ...interface{})) { + for _, n := range nodeNames { + logFunc("\nLogging node info for node %v", n) + node, err := c.CoreV1().Nodes().Get(context.TODO(), n, metav1.GetOptions{}) + if err != nil { + logFunc("Error getting node info %v", err) + } + logFunc("Node Info: %v", node) + + logFunc("\nLogging kubelet events for node %v", n) + for _, e := range getNodeEvents(c, n) { + logFunc("source %v type %v message %v reason %v first ts %v last ts %v, involved obj %+v", + e.Source, e.Type, e.Message, e.Reason, e.FirstTimestamp, e.LastTimestamp, e.InvolvedObject) + } + logFunc("\nLogging pods the kubelet thinks is on node %v", n) + podList, err := getKubeletPods(c, n) + if err != nil { + logFunc("Unable to retrieve kubelet pods for node %v: %v", n, err) + continue + } + for _, p := range podList.Items { + logFunc("%v started at %v (%d+%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.InitContainerStatuses), len(p.Status.ContainerStatuses)) + for _, c := range p.Status.InitContainerStatuses { + logFunc("\tInit container %v ready: %v, restart count %v", + c.Name, c.Ready, c.RestartCount) + } + for _, c := range p.Status.ContainerStatuses { + logFunc("\tContainer %v ready: %v, restart count %v", + c.Name, c.Ready, c.RestartCount) + } + } + e2emetrics.HighLatencyKubeletOperations(c, 10*time.Second, n, logFunc) + // TODO: Log node resource info + } +} + +// getKubeletPods retrieves the list of pods on the kubelet. +func getKubeletPods(c clientset.Interface, node string) (*v1.PodList, error) { + var client restclient.Result + finished := make(chan struct{}, 1) + go func() { + // call chain tends to hang in some cases when Node is not ready. Add an artificial timeout for this call. #22165 + client = c.CoreV1().RESTClient().Get(). + Resource("nodes"). + SubResource("proxy"). + Name(fmt.Sprintf("%v:%v", node, framework.KubeletPort)). + Suffix("pods"). + Do(context.TODO()) + + finished <- struct{}{} + }() + select { + case <-finished: + result := &v1.PodList{} + if err := client.Into(result); err != nil { + return &v1.PodList{}, err + } + return result, nil + case <-time.After(framework.PodGetTimeout): + return &v1.PodList{}, fmt.Errorf("Waiting up to %v for getting the list of pods", framework.PodGetTimeout) + } +} + +// logNodeEvents logs kubelet events from the given node. This includes kubelet +// restart and node unhealthy events. Note that listing events like this will mess +// with latency metrics, beware of calling it during a test. +func getNodeEvents(c clientset.Interface, nodeName string) []v1.Event { + selector := fields.Set{ + "involvedObject.kind": "Node", + "involvedObject.name": nodeName, + "involvedObject.namespace": metav1.NamespaceAll, + "source": "kubelet", + }.AsSelector().String() + options := metav1.ListOptions{FieldSelector: selector} + events, err := c.CoreV1().Events(metav1.NamespaceSystem).List(context.TODO(), options) + if err != nil { + framework.Logf("Unexpected error retrieving node events %v", err) + return []v1.Event{} + } + return events.Items +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/debug/init/init.go b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/init/init.go new file mode 100644 index 0000000000..769db07c5c --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/init/init.go @@ -0,0 +1,101 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package init sets debug.DumpAllNamespaceInfo as implementation in the framework +// and enables log size verification and resource gathering. +package init + +import ( + "sync" + "time" + + "github.com/onsi/ginkgo/v2" + "k8s.io/kubernetes/test/e2e/framework" + e2edebug "k8s.io/kubernetes/test/e2e/framework/debug" +) + +func init() { + framework.NewFrameworkExtensions = append(framework.NewFrameworkExtensions, + func(f *framework.Framework) { + f.DumpAllNamespaceInfo = func(f *framework.Framework, ns string) { + e2edebug.DumpAllNamespaceInfo(f.ClientSet, ns) + } + + if framework.TestContext.GatherLogsSizes { + var ( + wg sync.WaitGroup + closeChannel chan bool + verifier *e2edebug.LogsSizeVerifier + ) + + ginkgo.BeforeEach(func() { + wg.Add(1) + closeChannel = make(chan bool) + verifier = e2edebug.NewLogsVerifier(f.ClientSet, closeChannel) + go func() { + defer wg.Done() + verifier.Run() + }() + ginkgo.DeferCleanup(func() { + ginkgo.By("Gathering log sizes data", func() { + close(closeChannel) + wg.Wait() + f.TestSummaries = append(f.TestSummaries, verifier.GetSummary()) + }) + }) + }) + } + + if framework.TestContext.GatherKubeSystemResourceUsageData != "false" && + framework.TestContext.GatherKubeSystemResourceUsageData != "none" { + ginkgo.BeforeEach(func() { + var nodeMode e2edebug.NodesSet + switch framework.TestContext.GatherKubeSystemResourceUsageData { + case "master": + nodeMode = e2edebug.MasterNodes + case "masteranddns": + nodeMode = e2edebug.MasterAndDNSNodes + default: + nodeMode = e2edebug.AllNodes + } + + gatherer, err := e2edebug.NewResourceUsageGatherer(f.ClientSet, e2edebug.ResourceGathererOptions{ + InKubemark: framework.ProviderIs("kubemark"), + Nodes: nodeMode, + ResourceDataGatheringPeriod: 60 * time.Second, + ProbeDuration: 15 * time.Second, + PrintVerboseLogs: false, + }, nil) + if err != nil { + framework.Logf("Error while creating NewResourceUsageGatherer: %v", err) + return + } + + go gatherer.StartGatheringData() + ginkgo.DeferCleanup(func() { + ginkgo.By("Collecting resource usage data", func() { + summary, resourceViolationError := gatherer.StopAndSummarize([]int{90, 99, 100}, nil /* no constraints */) + // Always record the summary, even if there was an error. + f.TestSummaries = append(f.TestSummaries, summary) + // Now fail if there was an error. + framework.ExpectNoError(resourceViolationError) + }) + }) + }) + } + }, + ) +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/debug/log_size_monitoring.go b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/log_size_monitoring.go new file mode 100644 index 0000000000..b5f53a208f --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/log_size_monitoring.go @@ -0,0 +1,288 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package debug + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "sync" + "text/tabwriter" + "time" + + clientset "k8s.io/client-go/kubernetes" + + "k8s.io/kubernetes/test/e2e/framework" + e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" +) + +const ( + // Minimal period between polling log sizes from components + pollingPeriod = 60 * time.Second + workersNo = 5 + kubeletLogsPath = "/var/log/kubelet.log" + kubeProxyLogsPath = "/var/log/kube-proxy.log" + kubeAddonsLogsPath = "/var/log/kube-addons.log" + kubeMasterAddonsLogsPath = "/var/log/kube-master-addons.log" + apiServerLogsPath = "/var/log/kube-apiserver.log" + controllersLogsPath = "/var/log/kube-controller-manager.log" + schedulerLogsPath = "/var/log/kube-scheduler.log" +) + +var ( + nodeLogsToCheck = []string{kubeletLogsPath, kubeProxyLogsPath} + masterLogsToCheck = []string{kubeletLogsPath, kubeAddonsLogsPath, kubeMasterAddonsLogsPath, + apiServerLogsPath, controllersLogsPath, schedulerLogsPath} +) + +// TimestampedSize contains a size together with a time of measurement. +type TimestampedSize struct { + timestamp time.Time + size int +} + +// LogSizeGatherer is a worker which grabs a WorkItem from the channel and does assigned work. +type LogSizeGatherer struct { + stopChannel chan bool + data *LogsSizeData + wg *sync.WaitGroup + workChannel chan WorkItem +} + +// LogsSizeVerifier gathers data about log files sizes from master and node machines. +// It oversees a workers which do the gathering. +type LogsSizeVerifier struct { + client clientset.Interface + stopChannel chan bool + // data stores LogSizeData groupped per IP and log_path + data *LogsSizeData + masterAddress string + nodeAddresses []string + wg sync.WaitGroup + workChannel chan WorkItem + workers []*LogSizeGatherer +} + +// SingleLogSummary is a structure for handling average generation rate and number of probes. +type SingleLogSummary struct { + AverageGenerationRate int + NumberOfProbes int +} + +// LogSizeDataTimeseries is map of timestamped size. +type LogSizeDataTimeseries map[string]map[string][]TimestampedSize + +// LogsSizeDataSummary is map of log summary. +// node -> file -> data +type LogsSizeDataSummary map[string]map[string]SingleLogSummary + +// PrintHumanReadable returns string of log size data summary. +// TODO: make sure that we don't need locking here +func (s *LogsSizeDataSummary) PrintHumanReadable() string { + buf := &bytes.Buffer{} + w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0) + fmt.Fprintf(w, "host\tlog_file\taverage_rate (B/s)\tnumber_of_probes\n") + for k, v := range *s { + fmt.Fprintf(w, "%v\t\t\t\n", k) + for path, data := range v { + fmt.Fprintf(w, "\t%v\t%v\t%v\n", path, data.AverageGenerationRate, data.NumberOfProbes) + } + } + w.Flush() + return buf.String() +} + +// PrintJSON returns the summary of log size data with JSON format. +func (s *LogsSizeDataSummary) PrintJSON() string { + return framework.PrettyPrintJSON(*s) +} + +// SummaryKind returns the summary of log size data summary. +func (s *LogsSizeDataSummary) SummaryKind() string { + return "LogSizeSummary" +} + +// LogsSizeData is a structure for handling timeseries of log size data and lock. +type LogsSizeData struct { + data LogSizeDataTimeseries + lock sync.Mutex +} + +// WorkItem is a command for a worker that contains an IP of machine from which we want to +// gather data and paths to all files we're interested in. +type WorkItem struct { + ip string + paths []string + backoffMultiplier int +} + +func prepareData(masterAddress string, nodeAddresses []string) *LogsSizeData { + data := make(LogSizeDataTimeseries) + ips := append(nodeAddresses, masterAddress) + for _, ip := range ips { + data[ip] = make(map[string][]TimestampedSize) + } + return &LogsSizeData{ + data: data, + lock: sync.Mutex{}, + } +} + +func (d *LogsSizeData) addNewData(ip, path string, timestamp time.Time, size int) { + d.lock.Lock() + defer d.lock.Unlock() + d.data[ip][path] = append( + d.data[ip][path], + TimestampedSize{ + timestamp: timestamp, + size: size, + }, + ) +} + +// NewLogsVerifier creates a new LogsSizeVerifier which will stop when stopChannel is closed +func NewLogsVerifier(c clientset.Interface, stopChannel chan bool) *LogsSizeVerifier { + nodeAddresses, err := e2essh.NodeSSHHosts(c) + framework.ExpectNoError(err) + instanceAddress := framework.APIAddress() + ":22" + + workChannel := make(chan WorkItem, len(nodeAddresses)+1) + workers := make([]*LogSizeGatherer, workersNo) + + verifier := &LogsSizeVerifier{ + client: c, + stopChannel: stopChannel, + data: prepareData(instanceAddress, nodeAddresses), + masterAddress: instanceAddress, + nodeAddresses: nodeAddresses, + wg: sync.WaitGroup{}, + workChannel: workChannel, + workers: workers, + } + verifier.wg.Add(workersNo) + for i := 0; i < workersNo; i++ { + workers[i] = &LogSizeGatherer{ + stopChannel: stopChannel, + data: verifier.data, + wg: &verifier.wg, + workChannel: workChannel, + } + } + return verifier +} + +// GetSummary returns a summary (average generation rate and number of probes) of the data gathered by LogSizeVerifier +func (s *LogsSizeVerifier) GetSummary() *LogsSizeDataSummary { + result := make(LogsSizeDataSummary) + for k, v := range s.data.data { + result[k] = make(map[string]SingleLogSummary) + for path, data := range v { + if len(data) > 1 { + last := data[len(data)-1] + first := data[0] + rate := (last.size - first.size) / int(last.timestamp.Sub(first.timestamp)/time.Second) + result[k][path] = SingleLogSummary{ + AverageGenerationRate: rate, + NumberOfProbes: len(data), + } + } + } + } + return &result +} + +// Run starts log size gathering. It starts a gorouting for every worker and then blocks until stopChannel is closed +func (s *LogsSizeVerifier) Run() { + s.workChannel <- WorkItem{ + ip: s.masterAddress, + paths: masterLogsToCheck, + backoffMultiplier: 1, + } + for _, node := range s.nodeAddresses { + s.workChannel <- WorkItem{ + ip: node, + paths: nodeLogsToCheck, + backoffMultiplier: 1, + } + } + for _, worker := range s.workers { + go worker.Run() + } + <-s.stopChannel + s.wg.Wait() +} + +// Run starts log size gathering. +func (g *LogSizeGatherer) Run() { + for g.Work() { + } +} + +func (g *LogSizeGatherer) pushWorkItem(workItem WorkItem) { + select { + case <-time.After(time.Duration(workItem.backoffMultiplier) * pollingPeriod): + g.workChannel <- workItem + case <-g.stopChannel: + return + } +} + +// Work does a single unit of work: tries to take out a WorkItem from the queue, ssh-es into a given machine, +// gathers data, writes it to the shared map, and creates a gorouting which reinserts work item into +// the queue with a delay. Returns false if worker should exit. +func (g *LogSizeGatherer) Work() bool { + var workItem WorkItem + select { + case <-g.stopChannel: + g.wg.Done() + return false + case workItem = <-g.workChannel: + } + sshResult, err := e2essh.SSH( + fmt.Sprintf("ls -l %v | awk '{print $9, $5}' | tr '\n' ' '", strings.Join(workItem.paths, " ")), + workItem.ip, + framework.TestContext.Provider, + ) + if err != nil { + framework.Logf("Error while trying to SSH to %v, skipping probe. Error: %v", workItem.ip, err) + // In case of repeated error give up. + if workItem.backoffMultiplier >= 128 { + framework.Logf("Failed to ssh to a node %v multiple times in a row. Giving up.", workItem.ip) + g.wg.Done() + return false + } + workItem.backoffMultiplier *= 2 + go g.pushWorkItem(workItem) + return true + } + workItem.backoffMultiplier = 1 + results := strings.Split(sshResult.Stdout, " ") + + now := time.Now() + for i := 0; i+1 < len(results); i = i + 2 { + path := results[i] + size, err := strconv.Atoi(results[i+1]) + if err != nil { + framework.Logf("Error during conversion to int: %v, skipping data. Error: %v", results[i+1], err) + continue + } + g.data.addNewData(workItem.ip, path, now, size) + } + go g.pushWorkItem(workItem) + return true +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/debug/resource_usage_gatherer.go b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/resource_usage_gatherer.go new file mode 100644 index 0000000000..f401cac27e --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/debug/resource_usage_gatherer.go @@ -0,0 +1,659 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package debug + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "math" + "regexp" + "sort" + "strconv" + "strings" + "sync" + "text/tabwriter" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientset "k8s.io/client-go/kubernetes" + kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + + "k8s.io/kubernetes/test/e2e/framework" + e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" +) + +// ResourceConstraint is a struct to hold constraints. +type ResourceConstraint struct { + CPUConstraint float64 + MemoryConstraint uint64 +} + +// SingleContainerSummary is a struct to hold single container summary. +type SingleContainerSummary struct { + Name string + CPU float64 + Mem uint64 +} + +// ContainerResourceUsage is a structure for gathering container resource usage. +type ContainerResourceUsage struct { + Name string + Timestamp time.Time + CPUUsageInCores float64 + MemoryUsageInBytes uint64 + MemoryWorkingSetInBytes uint64 + MemoryRSSInBytes uint64 + // The interval used to calculate CPUUsageInCores. + CPUInterval time.Duration +} + +// ResourceUsagePerContainer is map of ContainerResourceUsage +type ResourceUsagePerContainer map[string]*ContainerResourceUsage + +// ResourceUsageSummary is a struct to hold resource usage summary. +// we can't have int here, as JSON does not accept integer keys. +type ResourceUsageSummary map[string][]SingleContainerSummary + +// PrintHumanReadable prints resource usage summary in human readable. +func (s *ResourceUsageSummary) PrintHumanReadable() string { + buf := &bytes.Buffer{} + w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0) + for perc, summaries := range *s { + buf.WriteString(fmt.Sprintf("%v percentile:\n", perc)) + fmt.Fprintf(w, "container\tcpu(cores)\tmemory(MB)\n") + for _, summary := range summaries { + fmt.Fprintf(w, "%q\t%.3f\t%.2f\n", summary.Name, summary.CPU, float64(summary.Mem)/(1024*1024)) + } + w.Flush() + } + return buf.String() +} + +// PrintJSON prints resource usage summary in JSON. +func (s *ResourceUsageSummary) PrintJSON() string { + return framework.PrettyPrintJSON(*s) +} + +// SummaryKind returns string of ResourceUsageSummary +func (s *ResourceUsageSummary) SummaryKind() string { + return "ResourceUsageSummary" +} + +type uint64arr []uint64 + +func (a uint64arr) Len() int { return len(a) } +func (a uint64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a uint64arr) Less(i, j int) bool { return a[i] < a[j] } + +type usageDataPerContainer struct { + cpuData []float64 + memUseData []uint64 + memWorkSetData []uint64 +} + +func computePercentiles(timeSeries []ResourceUsagePerContainer, percentilesToCompute []int) map[int]ResourceUsagePerContainer { + if len(timeSeries) == 0 { + return make(map[int]ResourceUsagePerContainer) + } + dataMap := make(map[string]*usageDataPerContainer) + for i := range timeSeries { + for name, data := range timeSeries[i] { + if dataMap[name] == nil { + dataMap[name] = &usageDataPerContainer{ + cpuData: make([]float64, 0, len(timeSeries)), + memUseData: make([]uint64, 0, len(timeSeries)), + memWorkSetData: make([]uint64, 0, len(timeSeries)), + } + } + dataMap[name].cpuData = append(dataMap[name].cpuData, data.CPUUsageInCores) + dataMap[name].memUseData = append(dataMap[name].memUseData, data.MemoryUsageInBytes) + dataMap[name].memWorkSetData = append(dataMap[name].memWorkSetData, data.MemoryWorkingSetInBytes) + } + } + for _, v := range dataMap { + sort.Float64s(v.cpuData) + sort.Sort(uint64arr(v.memUseData)) + sort.Sort(uint64arr(v.memWorkSetData)) + } + + result := make(map[int]ResourceUsagePerContainer) + for _, perc := range percentilesToCompute { + data := make(ResourceUsagePerContainer) + for k, v := range dataMap { + percentileIndex := int(math.Ceil(float64(len(v.cpuData)*perc)/100)) - 1 + data[k] = &ContainerResourceUsage{ + Name: k, + CPUUsageInCores: v.cpuData[percentileIndex], + MemoryUsageInBytes: v.memUseData[percentileIndex], + MemoryWorkingSetInBytes: v.memWorkSetData[percentileIndex], + } + } + result[perc] = data + } + return result +} + +func leftMergeData(left, right map[int]ResourceUsagePerContainer) map[int]ResourceUsagePerContainer { + result := make(map[int]ResourceUsagePerContainer) + for percentile, data := range left { + result[percentile] = data + if _, ok := right[percentile]; !ok { + continue + } + for k, v := range right[percentile] { + result[percentile][k] = v + } + } + return result +} + +type resourceGatherWorker struct { + c clientset.Interface + nodeName string + wg *sync.WaitGroup + containerIDs []string + stopCh chan struct{} + dataSeries []ResourceUsagePerContainer + finished bool + inKubemark bool + resourceDataGatheringPeriod time.Duration + probeDuration time.Duration + printVerboseLogs bool +} + +func (w *resourceGatherWorker) singleProbe() { + data := make(ResourceUsagePerContainer) + if w.inKubemark { + kubemarkData := getKubemarkMasterComponentsResourceUsage() + if kubemarkData == nil { + return + } + for k, v := range kubemarkData { + data[k] = &ContainerResourceUsage{ + Name: v.Name, + MemoryWorkingSetInBytes: v.MemoryWorkingSetInBytes, + CPUUsageInCores: v.CPUUsageInCores, + } + } + } else { + nodeUsage, err := getOneTimeResourceUsageOnNode(w.c, w.nodeName, w.probeDuration, func() []string { return w.containerIDs }) + if err != nil { + framework.Logf("Error while reading data from %v: %v", w.nodeName, err) + return + } + for k, v := range nodeUsage { + data[k] = v + if w.printVerboseLogs { + framework.Logf("Get container %v usage on node %v. CPUUsageInCores: %v, MemoryUsageInBytes: %v, MemoryWorkingSetInBytes: %v", k, w.nodeName, v.CPUUsageInCores, v.MemoryUsageInBytes, v.MemoryWorkingSetInBytes) + } + } + } + w.dataSeries = append(w.dataSeries, data) +} + +// getOneTimeResourceUsageOnNode queries the node's /stats/summary endpoint +// and returns the resource usage of all containerNames for the past +// cpuInterval. +// The acceptable range of the interval is 2s~120s. Be warned that as the +// interval (and #containers) increases, the size of kubelet's response +// could be significant. E.g., the 60s interval stats for ~20 containers is +// ~1.5MB. Don't hammer the node with frequent, heavy requests. +// +// cadvisor records cumulative cpu usage in nanoseconds, so we need to have two +// stats points to compute the cpu usage over the interval. Assuming cadvisor +// polls every second, we'd need to get N stats points for N-second interval. +// Note that this is an approximation and may not be accurate, hence we also +// write the actual interval used for calculation (based on the timestamps of +// the stats points in ContainerResourceUsage.CPUInterval. +// +// containerNames is a function returning a collection of container names in which +// user is interested in. +func getOneTimeResourceUsageOnNode( + c clientset.Interface, + nodeName string, + cpuInterval time.Duration, + containerNames func() []string, +) (ResourceUsagePerContainer, error) { + const ( + // cadvisor records stats about every second. + cadvisorStatsPollingIntervalInSeconds float64 = 1.0 + // cadvisor caches up to 2 minutes of stats (configured by kubelet). + maxNumStatsToRequest int = 120 + ) + + numStats := int(float64(cpuInterval.Seconds()) / cadvisorStatsPollingIntervalInSeconds) + if numStats < 2 || numStats > maxNumStatsToRequest { + return nil, fmt.Errorf("numStats needs to be > 1 and < %d", maxNumStatsToRequest) + } + // Get information of all containers on the node. + summary, err := getStatsSummary(c, nodeName) + if err != nil { + return nil, err + } + + f := func(name string, newStats *kubeletstatsv1alpha1.ContainerStats) *ContainerResourceUsage { + if newStats == nil || newStats.CPU == nil || newStats.Memory == nil { + return nil + } + return &ContainerResourceUsage{ + Name: name, + Timestamp: newStats.StartTime.Time, + CPUUsageInCores: float64(removeUint64Ptr(newStats.CPU.UsageNanoCores)) / 1000000000, + MemoryUsageInBytes: removeUint64Ptr(newStats.Memory.UsageBytes), + MemoryWorkingSetInBytes: removeUint64Ptr(newStats.Memory.WorkingSetBytes), + MemoryRSSInBytes: removeUint64Ptr(newStats.Memory.RSSBytes), + CPUInterval: 0, + } + } + // Process container infos that are relevant to us. + containers := containerNames() + usageMap := make(ResourceUsagePerContainer, len(containers)) + for _, pod := range summary.Pods { + for _, container := range pod.Containers { + isInteresting := false + for _, interestingContainerName := range containers { + if container.Name == interestingContainerName { + isInteresting = true + break + } + } + if !isInteresting { + continue + } + if usage := f(pod.PodRef.Name+"/"+container.Name, &container); usage != nil { + usageMap[pod.PodRef.Name+"/"+container.Name] = usage + } + } + } + return usageMap, nil +} + +// getStatsSummary contacts kubelet for the container information. +func getStatsSummary(c clientset.Interface, nodeName string) (*kubeletstatsv1alpha1.Summary, error) { + ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout) + defer cancel() + + data, err := c.CoreV1().RESTClient().Get(). + Resource("nodes"). + SubResource("proxy"). + Name(fmt.Sprintf("%v:%v", nodeName, framework.KubeletPort)). + Suffix("stats/summary"). + Do(ctx).Raw() + + if err != nil { + return nil, err + } + + summary := kubeletstatsv1alpha1.Summary{} + err = json.Unmarshal(data, &summary) + if err != nil { + return nil, err + } + return &summary, nil +} + +func removeUint64Ptr(ptr *uint64) uint64 { + if ptr == nil { + return 0 + } + return *ptr +} + +func (w *resourceGatherWorker) gather(initialSleep time.Duration) { + defer utilruntime.HandleCrash() + defer w.wg.Done() + defer framework.Logf("Closing worker for %v", w.nodeName) + defer func() { w.finished = true }() + select { + case <-time.After(initialSleep): + w.singleProbe() + for { + select { + case <-time.After(w.resourceDataGatheringPeriod): + w.singleProbe() + case <-w.stopCh: + return + } + } + case <-w.stopCh: + return + } +} + +// ContainerResourceGatherer is a struct for gathering container resource. +type ContainerResourceGatherer struct { + client clientset.Interface + stopCh chan struct{} + workers []resourceGatherWorker + workerWg sync.WaitGroup + containerIDs []string + options ResourceGathererOptions +} + +// ResourceGathererOptions is a struct to hold options for resource. +type ResourceGathererOptions struct { + InKubemark bool + Nodes NodesSet + ResourceDataGatheringPeriod time.Duration + ProbeDuration time.Duration + PrintVerboseLogs bool +} + +// NodesSet is a value of nodes set. +type NodesSet int + +const ( + // AllNodes means all containers on all nodes. + AllNodes NodesSet = 0 + // MasterNodes means all containers on Master nodes only. + MasterNodes NodesSet = 1 + // MasterAndDNSNodes means all containers on Master nodes and DNS containers on other nodes. + MasterAndDNSNodes NodesSet = 2 +) + +// nodeHasControlPlanePods returns true if specified node has control plane pods +// (kube-scheduler and/or kube-controller-manager). +func nodeHasControlPlanePods(c clientset.Interface, nodeName string) (bool, error) { + regKubeScheduler := regexp.MustCompile("kube-scheduler-.*") + regKubeControllerManager := regexp.MustCompile("kube-controller-manager-.*") + + podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{ + FieldSelector: fields.OneTermEqualSelector("spec.nodeName", nodeName).String(), + }) + if err != nil { + return false, err + } + if len(podList.Items) < 1 { + framework.Logf("Can't find any pods in namespace %s to grab metrics from", metav1.NamespaceSystem) + } + for _, pod := range podList.Items { + if regKubeScheduler.MatchString(pod.Name) || regKubeControllerManager.MatchString(pod.Name) { + return true, nil + } + } + return false, nil +} + +// NewResourceUsageGatherer returns a new ContainerResourceGatherer. +func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOptions, pods *v1.PodList) (*ContainerResourceGatherer, error) { + g := ContainerResourceGatherer{ + client: c, + stopCh: make(chan struct{}), + containerIDs: make([]string, 0), + options: options, + } + + if options.InKubemark { + g.workerWg.Add(1) + g.workers = append(g.workers, resourceGatherWorker{ + inKubemark: true, + stopCh: g.stopCh, + wg: &g.workerWg, + finished: false, + resourceDataGatheringPeriod: options.ResourceDataGatheringPeriod, + probeDuration: options.ProbeDuration, + printVerboseLogs: options.PrintVerboseLogs, + }) + return &g, nil + } + + // Tracks kube-system pods if no valid PodList is passed in. + var err error + if pods == nil { + pods, err = c.CoreV1().Pods("kube-system").List(context.TODO(), metav1.ListOptions{}) + if err != nil { + framework.Logf("Error while listing Pods: %v", err) + return nil, err + } + } + dnsNodes := make(map[string]bool) + for _, pod := range pods.Items { + if options.Nodes == MasterNodes { + isControlPlane, err := nodeHasControlPlanePods(c, pod.Spec.NodeName) + if err != nil { + return nil, err + } + if !isControlPlane { + continue + } + } + if options.Nodes == MasterAndDNSNodes { + isControlPlane, err := nodeHasControlPlanePods(c, pod.Spec.NodeName) + if err != nil { + return nil, err + } + if !isControlPlane && pod.Labels["k8s-app"] != "kube-dns" { + continue + } + } + for _, container := range pod.Status.InitContainerStatuses { + g.containerIDs = append(g.containerIDs, container.Name) + } + for _, container := range pod.Status.ContainerStatuses { + g.containerIDs = append(g.containerIDs, container.Name) + } + if options.Nodes == MasterAndDNSNodes { + dnsNodes[pod.Spec.NodeName] = true + } + } + nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + if err != nil { + framework.Logf("Error while listing Nodes: %v", err) + return nil, err + } + + for _, node := range nodeList.Items { + isControlPlane, err := nodeHasControlPlanePods(c, node.Name) + if err != nil { + return nil, err + } + if options.Nodes == AllNodes || isControlPlane || dnsNodes[node.Name] { + g.workerWg.Add(1) + g.workers = append(g.workers, resourceGatherWorker{ + c: c, + nodeName: node.Name, + wg: &g.workerWg, + containerIDs: g.containerIDs, + stopCh: g.stopCh, + finished: false, + inKubemark: false, + resourceDataGatheringPeriod: options.ResourceDataGatheringPeriod, + probeDuration: options.ProbeDuration, + printVerboseLogs: options.PrintVerboseLogs, + }) + if options.Nodes == MasterNodes { + break + } + } + } + return &g, nil +} + +// StartGatheringData starts a stat gathering worker blocks for each node to track, +// and blocks until StopAndSummarize is called. +func (g *ContainerResourceGatherer) StartGatheringData() { + if len(g.workers) == 0 { + return + } + delayPeriod := g.options.ResourceDataGatheringPeriod / time.Duration(len(g.workers)) + delay := time.Duration(0) + for i := range g.workers { + go g.workers[i].gather(delay) + delay += delayPeriod + } + g.workerWg.Wait() +} + +// StopAndSummarize stops stat gathering workers, processes the collected stats, +// generates resource summary for the passed-in percentiles, and returns the summary. +// It returns an error if the resource usage at any percentile is beyond the +// specified resource constraints. +func (g *ContainerResourceGatherer) StopAndSummarize(percentiles []int, constraints map[string]ResourceConstraint) (*ResourceUsageSummary, error) { + close(g.stopCh) + framework.Logf("Closed stop channel. Waiting for %v workers", len(g.workers)) + finished := make(chan struct{}, 1) + go func() { + g.workerWg.Wait() + finished <- struct{}{} + }() + select { + case <-finished: + framework.Logf("Waitgroup finished.") + case <-time.After(2 * time.Minute): + unfinished := make([]string, 0) + for i := range g.workers { + if !g.workers[i].finished { + unfinished = append(unfinished, g.workers[i].nodeName) + } + } + framework.Logf("Timed out while waiting for waitgroup, some workers failed to finish: %v", unfinished) + } + + if len(percentiles) == 0 { + framework.Logf("Warning! Empty percentile list for stopAndPrintData.") + return &ResourceUsageSummary{}, fmt.Errorf("Failed to get any resource usage data") + } + data := make(map[int]ResourceUsagePerContainer) + for i := range g.workers { + if g.workers[i].finished { + stats := computePercentiles(g.workers[i].dataSeries, percentiles) + data = leftMergeData(stats, data) + } + } + + // Workers has been stopped. We need to gather data stored in them. + sortedKeys := []string{} + for name := range data[percentiles[0]] { + sortedKeys = append(sortedKeys, name) + } + sort.Strings(sortedKeys) + violatedConstraints := make([]string, 0) + summary := make(ResourceUsageSummary) + for _, perc := range percentiles { + for _, name := range sortedKeys { + usage := data[perc][name] + summary[strconv.Itoa(perc)] = append(summary[strconv.Itoa(perc)], SingleContainerSummary{ + Name: name, + CPU: usage.CPUUsageInCores, + Mem: usage.MemoryWorkingSetInBytes, + }) + + // Verifying 99th percentile of resource usage + if perc != 99 { + continue + } + // Name has a form: / + containerName := strings.Split(name, "/")[1] + constraint, ok := constraints[containerName] + if !ok { + continue + } + if usage.CPUUsageInCores > constraint.CPUConstraint { + violatedConstraints = append( + violatedConstraints, + fmt.Sprintf("Container %v is using %v/%v CPU", + name, + usage.CPUUsageInCores, + constraint.CPUConstraint, + ), + ) + } + if usage.MemoryWorkingSetInBytes > constraint.MemoryConstraint { + violatedConstraints = append( + violatedConstraints, + fmt.Sprintf("Container %v is using %v/%v MB of memory", + name, + float64(usage.MemoryWorkingSetInBytes)/(1024*1024), + float64(constraint.MemoryConstraint)/(1024*1024), + ), + ) + } + } + } + if len(violatedConstraints) > 0 { + return &summary, fmt.Errorf(strings.Join(violatedConstraints, "\n")) + } + return &summary, nil +} + +// kubemarkResourceUsage is a struct for tracking the resource usage of kubemark. +type kubemarkResourceUsage struct { + Name string + MemoryWorkingSetInBytes uint64 + CPUUsageInCores float64 +} + +func getMasterUsageByPrefix(prefix string) (string, error) { + sshResult, err := e2essh.SSH(fmt.Sprintf("ps ax -o %%cpu,rss,command | tail -n +2 | grep %v | sed 's/\\s+/ /g'", prefix), framework.APIAddress()+":22", framework.TestContext.Provider) + if err != nil { + return "", err + } + return sshResult.Stdout, nil +} + +// getKubemarkMasterComponentsResourceUsage returns the resource usage of kubemark which contains multiple combinations of cpu and memory usage for each pod name. +func getKubemarkMasterComponentsResourceUsage() map[string]*kubemarkResourceUsage { + result := make(map[string]*kubemarkResourceUsage) + // Get kubernetes component resource usage + sshResult, err := getMasterUsageByPrefix("kube") + if err != nil { + framework.Logf("Error when trying to SSH to master machine. Skipping probe. %v", err) + return nil + } + scanner := bufio.NewScanner(strings.NewReader(sshResult)) + for scanner.Scan() { + var cpu float64 + var mem uint64 + var name string + fmt.Sscanf(strings.TrimSpace(scanner.Text()), "%f %d /usr/local/bin/kube-%s", &cpu, &mem, &name) + if name != "" { + // Gatherer expects pod_name/container_name format + fullName := name + "/" + name + result[fullName] = &kubemarkResourceUsage{Name: fullName, MemoryWorkingSetInBytes: mem * 1024, CPUUsageInCores: cpu / 100} + } + } + // Get etcd resource usage + sshResult, err = getMasterUsageByPrefix("bin/etcd") + if err != nil { + framework.Logf("Error when trying to SSH to master machine. Skipping probe") + return nil + } + scanner = bufio.NewScanner(strings.NewReader(sshResult)) + for scanner.Scan() { + var cpu float64 + var mem uint64 + var etcdKind string + fmt.Sscanf(strings.TrimSpace(scanner.Text()), "%f %d /bin/sh -c /usr/local/bin/etcd", &cpu, &mem) + dataDirStart := strings.Index(scanner.Text(), "--data-dir") + if dataDirStart < 0 { + continue + } + fmt.Sscanf(scanner.Text()[dataDirStart:], "--data-dir=/var/%s", &etcdKind) + if etcdKind != "" { + // Gatherer expects pod_name/container_name format + fullName := "etcd/" + etcdKind + result[fullName] = &kubemarkResourceUsage{Name: fullName, MemoryWorkingSetInBytes: mem * 1024, CPUUsageInCores: cpu / 100} + } + } + return result +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api.go new file mode 100644 index 0000000000..cb0d276629 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api.go @@ -0,0 +1,89 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "fmt" + + e2eperftype "k8s.io/kubernetes/test/e2e/perftype" +) + +// APICall is a struct for managing API call. +type APICall struct { + Resource string `json:"resource"` + Subresource string `json:"subresource"` + Verb string `json:"verb"` + Scope string `json:"scope"` + Latency LatencyMetric `json:"latency"` + Count int `json:"count"` +} + +// APIResponsiveness is a struct for managing multiple API calls. +type APIResponsiveness struct { + APICalls []APICall `json:"apicalls"` +} + +// SummaryKind returns the summary of API responsiveness. +func (a *APIResponsiveness) SummaryKind() string { + return "APIResponsiveness" +} + +// PrintHumanReadable returns metrics with JSON format. +func (a *APIResponsiveness) PrintHumanReadable() string { + return PrettyPrintJSON(a) +} + +// PrintJSON returns metrics of PerfData(50, 90 and 99th percentiles) with JSON format. +func (a *APIResponsiveness) PrintJSON() string { + return PrettyPrintJSON(APICallToPerfData(a)) +} + +func (a *APIResponsiveness) Len() int { return len(a.APICalls) } +func (a *APIResponsiveness) Swap(i, j int) { + a.APICalls[i], a.APICalls[j] = a.APICalls[j], a.APICalls[i] +} +func (a *APIResponsiveness) Less(i, j int) bool { + return a.APICalls[i].Latency.Perc99 < a.APICalls[j].Latency.Perc99 +} + +// currentAPICallMetricsVersion is the current apicall performance metrics version. We should +// bump up the version each time we make incompatible change to the metrics. +const currentAPICallMetricsVersion = "v1" + +// APICallToPerfData transforms APIResponsiveness to PerfData. +func APICallToPerfData(apicalls *APIResponsiveness) *e2eperftype.PerfData { + perfData := &e2eperftype.PerfData{Version: currentAPICallMetricsVersion} + for _, apicall := range apicalls.APICalls { + item := e2eperftype.DataItem{ + Data: map[string]float64{ + "Perc50": float64(apicall.Latency.Perc50) / 1000000, // us -> ms + "Perc90": float64(apicall.Latency.Perc90) / 1000000, + "Perc99": float64(apicall.Latency.Perc99) / 1000000, + }, + Unit: "ms", + Labels: map[string]string{ + "Verb": apicall.Verb, + "Resource": apicall.Resource, + "Subresource": apicall.Subresource, + "Scope": apicall.Scope, + "Count": fmt.Sprintf("%v", apicall.Count), + }, + } + perfData.DataItems = append(perfData.DataItems, item) + } + return perfData +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api_server_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api_server_metrics.go new file mode 100644 index 0000000000..3b7e376ce6 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/api_server_metrics.go @@ -0,0 +1,52 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + + "k8s.io/component-base/metrics/testutil" +) + +// APIServerMetrics is metrics for API server +type APIServerMetrics testutil.Metrics + +// Equal returns true if all metrics are the same as the arguments. +func (m *APIServerMetrics) Equal(o APIServerMetrics) bool { + return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) +} + +func newAPIServerMetrics() APIServerMetrics { + result := testutil.NewMetrics() + return APIServerMetrics(result) +} + +func parseAPIServerMetrics(data string) (APIServerMetrics, error) { + result := newAPIServerMetrics() + if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { + return APIServerMetrics{}, err + } + return result, nil +} + +func (g *Grabber) getMetricsFromAPIServer() (string, error) { + rawOutput, err := g.client.CoreV1().RESTClient().Get().RequestURI("/metrics").Do(context.TODO()).Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/cluster_autoscaler_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/cluster_autoscaler_metrics.go new file mode 100644 index 0000000000..16bc996e85 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/cluster_autoscaler_metrics.go @@ -0,0 +1,40 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import "k8s.io/component-base/metrics/testutil" + +// ClusterAutoscalerMetrics is metrics for cluster autoscaler +type ClusterAutoscalerMetrics testutil.Metrics + +// Equal returns true if all metrics are the same as the arguments. +func (m *ClusterAutoscalerMetrics) Equal(o ClusterAutoscalerMetrics) bool { + return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) +} + +func newClusterAutoscalerMetrics() ClusterAutoscalerMetrics { + result := testutil.NewMetrics() + return ClusterAutoscalerMetrics(result) +} + +func parseClusterAutoscalerMetrics(data string) (ClusterAutoscalerMetrics, error) { + result := newClusterAutoscalerMetrics() + if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { + return ClusterAutoscalerMetrics{}, err + } + return result, nil +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/controller_manager_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/controller_manager_metrics.go new file mode 100644 index 0000000000..797d14024f --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/controller_manager_metrics.go @@ -0,0 +1,40 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import "k8s.io/component-base/metrics/testutil" + +// ControllerManagerMetrics is metrics for controller manager +type ControllerManagerMetrics testutil.Metrics + +// Equal returns true if all metrics are the same as the arguments. +func (m *ControllerManagerMetrics) Equal(o ControllerManagerMetrics) bool { + return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) +} + +func newControllerManagerMetrics() ControllerManagerMetrics { + result := testutil.NewMetrics() + return ControllerManagerMetrics(result) +} + +func parseControllerManagerMetrics(data string) (ControllerManagerMetrics, error) { + result := newControllerManagerMetrics() + if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { + return ControllerManagerMetrics{}, err + } + return result, nil +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/e2e_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/e2e_metrics.go new file mode 100644 index 0000000000..42f8cf83f0 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/e2e_metrics.go @@ -0,0 +1,127 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "bytes" + "encoding/json" + "fmt" + + "k8s.io/component-base/metrics/testutil" + "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + // Cluster Autoscaler metrics names + caFunctionMetric = "cluster_autoscaler_function_duration_seconds_bucket" + caFunctionMetricLabel = "function" +) + +// ComponentCollection is metrics collection of components. +type ComponentCollection Collection + +func (m *ComponentCollection) filterMetrics() { + apiServerMetrics := make(APIServerMetrics) + for _, metric := range interestingAPIServerMetrics { + apiServerMetrics[metric] = (*m).APIServerMetrics[metric] + } + controllerManagerMetrics := make(ControllerManagerMetrics) + for _, metric := range interestingControllerManagerMetrics { + controllerManagerMetrics[metric] = (*m).ControllerManagerMetrics[metric] + } + kubeletMetrics := make(map[string]KubeletMetrics) + for kubelet, grabbed := range (*m).KubeletMetrics { + kubeletMetrics[kubelet] = make(KubeletMetrics) + for _, metric := range interestingKubeletMetrics { + kubeletMetrics[kubelet][metric] = grabbed[metric] + } + } + (*m).APIServerMetrics = apiServerMetrics + (*m).ControllerManagerMetrics = controllerManagerMetrics + (*m).KubeletMetrics = kubeletMetrics +} + +// PrintHumanReadable returns e2e metrics with JSON format. +func (m *ComponentCollection) PrintHumanReadable() string { + buf := bytes.Buffer{} + for _, interestingMetric := range interestingAPIServerMetrics { + buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric)) + for _, sample := range (*m).APIServerMetrics[interestingMetric] { + buf.WriteString(fmt.Sprintf("\t%v\n", testutil.PrintSample(sample))) + } + } + for _, interestingMetric := range interestingControllerManagerMetrics { + buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric)) + for _, sample := range (*m).ControllerManagerMetrics[interestingMetric] { + buf.WriteString(fmt.Sprintf("\t%v\n", testutil.PrintSample(sample))) + } + } + for _, interestingMetric := range interestingClusterAutoscalerMetrics { + buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric)) + for _, sample := range (*m).ClusterAutoscalerMetrics[interestingMetric] { + buf.WriteString(fmt.Sprintf("\t%v\n", testutil.PrintSample(sample))) + } + } + for kubelet, grabbed := range (*m).KubeletMetrics { + buf.WriteString(fmt.Sprintf("For %v:\n", kubelet)) + for _, interestingMetric := range interestingKubeletMetrics { + buf.WriteString(fmt.Sprintf("\tFor %v:\n", interestingMetric)) + for _, sample := range grabbed[interestingMetric] { + buf.WriteString(fmt.Sprintf("\t\t%v\n", testutil.PrintSample(sample))) + } + } + } + return buf.String() +} + +// PrettyPrintJSON converts metrics to JSON format. +// TODO: This function should be replaced with framework.PrettyPrintJSON after solving +// circulary dependency between core framework and this metrics subpackage. +func PrettyPrintJSON(metrics interface{}) string { + output := &bytes.Buffer{} + if err := json.NewEncoder(output).Encode(metrics); err != nil { + framework.Logf("Error building encoder: %v", err) + return "" + } + formatted := &bytes.Buffer{} + if err := json.Indent(formatted, output.Bytes(), "", " "); err != nil { + framework.Logf("Error indenting: %v", err) + return "" + } + return string(formatted.Bytes()) +} + +// PrintJSON returns e2e metrics with JSON format. +func (m *ComponentCollection) PrintJSON() string { + m.filterMetrics() + return PrettyPrintJSON(m) +} + +// SummaryKind returns the summary of e2e metrics. +func (m *ComponentCollection) SummaryKind() string { + return "ComponentCollection" +} + +// ComputeClusterAutoscalerMetricsDelta computes the change in cluster +// autoscaler metrics. +func (m *ComponentCollection) ComputeClusterAutoscalerMetricsDelta(before Collection) { + if beforeSamples, found := before.ClusterAutoscalerMetrics[caFunctionMetric]; found { + if afterSamples, found := m.ClusterAutoscalerMetrics[caFunctionMetric]; found { + testutil.ComputeHistogramDelta(beforeSamples, afterSamples, caFunctionMetricLabel) + } + } +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/grab.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/grab.go new file mode 100644 index 0000000000..2264f9f379 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/grab.go @@ -0,0 +1,73 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/onsi/ginkgo/v2" + + "k8s.io/kubernetes/test/e2e/framework" +) + +func GrabBeforeEach(f *framework.Framework) (result *Collection) { + gatherMetricsAfterTest := framework.TestContext.GatherMetricsAfterTest == "true" || framework.TestContext.GatherMetricsAfterTest == "master" + if !gatherMetricsAfterTest || !framework.TestContext.IncludeClusterAutoscalerMetrics { + return nil + } + + ginkgo.By("Gathering metrics before test", func() { + grabber, err := NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, f.ClientConfig(), !framework.ProviderIs("kubemark"), false, false, false, framework.TestContext.IncludeClusterAutoscalerMetrics, false) + if err != nil { + framework.Logf("Failed to create MetricsGrabber (skipping ClusterAutoscaler metrics gathering before test): %v", err) + return + } + metrics, err := grabber.Grab() + if err != nil { + framework.Logf("MetricsGrabber failed to grab CA metrics before test (skipping metrics gathering): %v", err) + return + } + framework.Logf("Gathered ClusterAutoscaler metrics before test") + result = &metrics + }) + + return +} + +func GrabAfterEach(f *framework.Framework, before *Collection) { + if framework.TestContext.GatherMetricsAfterTest == "false" { + return + } + + ginkgo.By("Gathering metrics after test", func() { + // Grab apiserver, scheduler, controller-manager metrics and (optionally) nodes' kubelet metrics. + grabMetricsFromKubelets := framework.TestContext.GatherMetricsAfterTest != "master" && !framework.ProviderIs("kubemark") + grabber, err := NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, f.ClientConfig(), grabMetricsFromKubelets, true, true, true, framework.TestContext.IncludeClusterAutoscalerMetrics, false) + if err != nil { + framework.Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err) + return + } + received, err := grabber.Grab() + if err != nil { + framework.Logf("MetricsGrabber failed to grab some of the metrics: %v", err) + return + } + if before == nil { + before = &Collection{} + } + (*ComponentCollection)(&received).ComputeClusterAutoscalerMetricsDelta(*before) + f.TestSummaries = append(f.TestSummaries, (*ComponentCollection)(&received)) + }) +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/interesting_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/interesting_metrics.go new file mode 100644 index 0000000000..b20f51f899 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/interesting_metrics.go @@ -0,0 +1,58 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +var interestingAPIServerMetrics = []string{ + "apiserver_request_total", + "apiserver_request_latency_seconds", + "apiserver_init_events_total", +} + +var interestingControllerManagerMetrics = []string{ + "garbage_collector_attempt_to_delete_queue_latency", + "garbage_collector_attempt_to_delete_work_duration", + "garbage_collector_attempt_to_orphan_queue_latency", + "garbage_collector_attempt_to_orphan_work_duration", + "garbage_collector_dirty_processing_latency_microseconds", + "garbage_collector_event_processing_latency_microseconds", + "garbage_collector_graph_changes_queue_latency", + "garbage_collector_graph_changes_work_duration", + "garbage_collector_orphan_processing_latency_microseconds", + + "namespace_queue_latency", + "namespace_queue_latency_sum", + "namespace_queue_latency_count", + "namespace_retries", + "namespace_work_duration", + "namespace_work_duration_sum", + "namespace_work_duration_count", +} + +var interestingKubeletMetrics = []string{ + "kubelet_docker_operations_errors_total", + "kubelet_docker_operations_duration_seconds", + "kubelet_pod_start_duration_seconds", + "kubelet_pod_start_sli_duration_seconds", + "kubelet_pod_worker_duration_seconds", + "kubelet_pod_worker_start_duration_seconds", +} + +var interestingClusterAutoscalerMetrics = []string{ + "function_duration_seconds", + "errors_total", + "evicted_pods_total", +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/kubelet_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/kubelet_metrics.go new file mode 100644 index 0000000000..12becf9b85 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/kubelet_metrics.go @@ -0,0 +1,236 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + "fmt" + "io" + "net/http" + "sort" + "strconv" + "strings" + "time" + + "k8s.io/apimachinery/pkg/util/sets" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/component-base/metrics/testutil" + "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + proxyTimeout = 2 * time.Minute + // dockerOperationsLatencyKey is the key for the operation latency metrics. + // Taken from k8s.io/kubernetes/pkg/kubelet/dockershim/metrics + dockerOperationsLatencyKey = "docker_operations_duration_seconds" + // Taken from k8s.io/kubernetes/pkg/kubelet/metrics + kubeletSubsystem = "kubelet" + // Taken from k8s.io/kubernetes/pkg/kubelet/metrics + podWorkerDurationKey = "pod_worker_duration_seconds" + // Taken from k8s.io/kubernetes/pkg/kubelet/metrics + podStartDurationKey = "pod_start_duration_seconds" + // Taken from k8s.io/kubernetes/pkg/kubelet/metrics + PodStartSLIDurationKey = "pod_start_sli_duration_seconds" + // Taken from k8s.io/kubernetes/pkg/kubelet/metrics + cgroupManagerOperationsKey = "cgroup_manager_duration_seconds" + // Taken from k8s.io/kubernetes/pkg/kubelet/metrics + podWorkerStartDurationKey = "pod_worker_start_duration_seconds" + // Taken from k8s.io/kubernetes/pkg/kubelet/metrics + plegRelistDurationKey = "pleg_relist_duration_seconds" +) + +// KubeletMetrics is metrics for kubelet +type KubeletMetrics testutil.Metrics + +// Equal returns true if all metrics are the same as the arguments. +func (m *KubeletMetrics) Equal(o KubeletMetrics) bool { + return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) +} + +// NewKubeletMetrics returns new metrics which are initialized. +func NewKubeletMetrics() KubeletMetrics { + result := testutil.NewMetrics() + return KubeletMetrics(result) +} + +// GrabKubeletMetricsWithoutProxy retrieve metrics from the kubelet on the given node using a simple GET over http. +// Currently only used in integration tests. +func GrabKubeletMetricsWithoutProxy(nodeName, path string) (KubeletMetrics, error) { + resp, err := http.Get(fmt.Sprintf("http://%s%s", nodeName, path)) + if err != nil { + return KubeletMetrics{}, err + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return KubeletMetrics{}, err + } + return parseKubeletMetrics(string(body)) +} + +func parseKubeletMetrics(data string) (KubeletMetrics, error) { + result := NewKubeletMetrics() + if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { + return KubeletMetrics{}, err + } + return result, nil +} + +func (g *Grabber) getMetricsFromNode(nodeName string, kubeletPort int) (string, error) { + // There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock. + finished := make(chan struct{}, 1) + var err error + var rawOutput []byte + go func() { + rawOutput, err = g.client.CoreV1().RESTClient().Get(). + Resource("nodes"). + SubResource("proxy"). + Name(fmt.Sprintf("%v:%v", nodeName, kubeletPort)). + Suffix("metrics"). + Do(context.TODO()).Raw() + finished <- struct{}{} + }() + select { + case <-time.After(proxyTimeout): + return "", fmt.Errorf("Timed out when waiting for proxy to gather metrics from %v", nodeName) + case <-finished: + if err != nil { + return "", err + } + return string(rawOutput), nil + } +} + +// KubeletLatencyMetric stores metrics scraped from the kubelet server's /metric endpoint. +// TODO: Get some more structure around the metrics and this type +type KubeletLatencyMetric struct { + // eg: list, info, create + Operation string + // eg: sync_pods, pod_worker + Method string + // 0 <= quantile <=1, e.g. 0.95 is 95%tile, 0.5 is median. + Quantile float64 + Latency time.Duration +} + +// KubeletLatencyMetrics implements sort.Interface for []KubeletMetric based on +// the latency field. +type KubeletLatencyMetrics []KubeletLatencyMetric + +func (a KubeletLatencyMetrics) Len() int { return len(a) } +func (a KubeletLatencyMetrics) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a KubeletLatencyMetrics) Less(i, j int) bool { return a[i].Latency > a[j].Latency } + +// If a apiserver client is passed in, the function will try to get kubelet metrics from metrics grabber; +// or else, the function will try to get kubelet metrics directly from the node. +func getKubeletMetricsFromNode(c clientset.Interface, nodeName string) (KubeletMetrics, error) { + if c == nil { + return GrabKubeletMetricsWithoutProxy(nodeName, "/metrics") + } + grabber, err := NewMetricsGrabber(c, nil, nil, true, false, false, false, false, false) + if err != nil { + return KubeletMetrics{}, err + } + return grabber.GrabFromKubelet(nodeName) +} + +// GetKubeletMetrics gets all metrics in kubelet subsystem from specified node and trims +// the subsystem prefix. +func GetKubeletMetrics(c clientset.Interface, nodeName string) (KubeletMetrics, error) { + ms, err := getKubeletMetricsFromNode(c, nodeName) + if err != nil { + return KubeletMetrics{}, err + } + + kubeletMetrics := make(KubeletMetrics) + for name, samples := range ms { + const prefix = kubeletSubsystem + "_" + if !strings.HasPrefix(name, prefix) { + // Not a kubelet metric. + continue + } + method := strings.TrimPrefix(name, prefix) + kubeletMetrics[method] = samples + } + return kubeletMetrics, nil +} + +// GetDefaultKubeletLatencyMetrics calls GetKubeletLatencyMetrics with a set of default metricNames +// identifying common latency metrics. +// Note that the KubeletMetrics passed in should not contain subsystem prefix. +func GetDefaultKubeletLatencyMetrics(ms KubeletMetrics) KubeletLatencyMetrics { + latencyMetricNames := sets.NewString( + podWorkerDurationKey, + podWorkerStartDurationKey, + podStartDurationKey, + PodStartSLIDurationKey, + cgroupManagerOperationsKey, + dockerOperationsLatencyKey, + podWorkerStartDurationKey, + plegRelistDurationKey, + ) + return GetKubeletLatencyMetrics(ms, latencyMetricNames) +} + +// GetKubeletLatencyMetrics filters ms to include only those contained in the metricNames set, +// then constructs a KubeletLatencyMetrics list based on the samples associated with those metrics. +func GetKubeletLatencyMetrics(ms KubeletMetrics, filterMetricNames sets.String) KubeletLatencyMetrics { + var latencyMetrics KubeletLatencyMetrics + for name, samples := range ms { + if !filterMetricNames.Has(name) { + continue + } + for _, sample := range samples { + latency := sample.Value + operation := string(sample.Metric["operation_type"]) + var quantile float64 + if val, ok := sample.Metric[testutil.QuantileLabel]; ok { + var err error + if quantile, err = strconv.ParseFloat(string(val), 64); err != nil { + continue + } + } + + latencyMetrics = append(latencyMetrics, KubeletLatencyMetric{ + Operation: operation, + Method: name, + Quantile: quantile, + Latency: time.Duration(int64(latency)) * time.Microsecond, + }) + } + } + return latencyMetrics +} + +// HighLatencyKubeletOperations logs and counts the high latency metrics exported by the kubelet server via /metrics. +func HighLatencyKubeletOperations(c clientset.Interface, threshold time.Duration, nodeName string, logFunc func(fmt string, args ...interface{})) (KubeletLatencyMetrics, error) { + ms, err := GetKubeletMetrics(c, nodeName) + if err != nil { + return KubeletLatencyMetrics{}, err + } + latencyMetrics := GetDefaultKubeletLatencyMetrics(ms) + sort.Sort(latencyMetrics) + var badMetrics KubeletLatencyMetrics + logFunc("\nLatency metrics for node %v", nodeName) + for _, m := range latencyMetrics { + if m.Latency > threshold { + badMetrics = append(badMetrics, m) + framework.Logf("%+v", m) + } + } + return badMetrics, nil +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/latencies.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/latencies.go new file mode 100644 index 0000000000..3f549d5dbd --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/latencies.go @@ -0,0 +1,38 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "time" +) + +// PodLatencyData encapsulates pod startup latency information. +type PodLatencyData struct { + // Name of the pod + Name string + // Node this pod was running on + Node string + // Latency information related to pod startuptime + Latency time.Duration +} + +// LatencySlice is an array of PodLatencyData which encapsulates pod startup latency information. +type LatencySlice []PodLatencyData + +func (a LatencySlice) Len() int { return len(a) } +func (a LatencySlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a LatencySlice) Less(i, j int) bool { return a[i].Latency < a[j].Latency } diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/metrics_grabber.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/metrics_grabber.go new file mode 100644 index 0000000000..63c90c3ed5 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/metrics_grabber.go @@ -0,0 +1,452 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + "errors" + "fmt" + "net" + "regexp" + "sync" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" + + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" +) + +const ( + // kubeSchedulerPort is the default port for the scheduler status server. + kubeSchedulerPort = 10259 + // kubeControllerManagerPort is the default port for the controller manager status server. + kubeControllerManagerPort = 10257 + // snapshotControllerPort is the port for the snapshot controller + snapshotControllerPort = 9102 +) + +// MetricsGrabbingDisabledError is an error that is wrapped by the +// different MetricsGrabber.Wrap functions when metrics grabbing is +// not supported. Tests that check metrics data should then skip +// the check. +var MetricsGrabbingDisabledError = errors.New("metrics grabbing disabled") + +// Collection is metrics collection of components +type Collection struct { + APIServerMetrics APIServerMetrics + ControllerManagerMetrics ControllerManagerMetrics + SnapshotControllerMetrics SnapshotControllerMetrics + KubeletMetrics map[string]KubeletMetrics + SchedulerMetrics SchedulerMetrics + ClusterAutoscalerMetrics ClusterAutoscalerMetrics +} + +// Grabber provides functions which grab metrics from components +type Grabber struct { + client clientset.Interface + externalClient clientset.Interface + config *rest.Config + grabFromAPIServer bool + grabFromControllerManager bool + grabFromKubelets bool + grabFromScheduler bool + grabFromClusterAutoscaler bool + grabFromSnapshotController bool + kubeScheduler string + waitForSchedulerReadyOnce sync.Once + kubeControllerManager string + waitForControllerManagerReadyOnce sync.Once + snapshotController string + waitForSnapshotControllerReadyOnce sync.Once +} + +// NewMetricsGrabber prepares for grabbing metrics data from several different +// components. It should be called when those components are running because +// it needs to communicate with them to determine for which components +// metrics data can be retrieved. +// +// Collecting metrics data is an optional debug feature. Not all clusters will +// support it. If disabled for a component, the corresponding Grab function +// will immediately return an error derived from MetricsGrabbingDisabledError. +func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, config *rest.Config, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool, snapshotController bool) (*Grabber, error) { + + kubeScheduler := "" + kubeControllerManager := "" + snapshotControllerManager := "" + + regKubeScheduler := regexp.MustCompile("kube-scheduler-.*") + regKubeControllerManager := regexp.MustCompile("kube-controller-manager-.*") + regSnapshotController := regexp.MustCompile("volume-snapshot-controller.*") + + if (scheduler || controllers) && config == nil { + return nil, errors.New("a rest config is required for grabbing kube-controller and kube-controller-manager metrics") + } + + podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return nil, err + } + if len(podList.Items) < 1 { + klog.Warningf("Can't find any pods in namespace %s to grab metrics from", metav1.NamespaceSystem) + } + for _, pod := range podList.Items { + if regKubeScheduler.MatchString(pod.Name) { + kubeScheduler = pod.Name + } + if regKubeControllerManager.MatchString(pod.Name) { + kubeControllerManager = pod.Name + } + if regSnapshotController.MatchString(pod.Name) { + snapshotControllerManager = pod.Name + } + if kubeScheduler != "" && kubeControllerManager != "" && snapshotControllerManager != "" { + break + } + } + if clusterAutoscaler && ec == nil { + klog.Warningf("Did not receive an external client interface. Grabbing metrics from ClusterAutoscaler is disabled.") + } + + return &Grabber{ + client: c, + externalClient: ec, + config: config, + grabFromAPIServer: apiServer, + grabFromControllerManager: checkPodDebugHandlers(c, controllers, "kube-controller-manager", kubeControllerManager), + grabFromKubelets: kubelets, + grabFromScheduler: checkPodDebugHandlers(c, scheduler, "kube-scheduler", kubeScheduler), + grabFromClusterAutoscaler: clusterAutoscaler, + grabFromSnapshotController: checkPodDebugHandlers(c, snapshotController, "snapshot-controller", snapshotControllerManager), + kubeScheduler: kubeScheduler, + kubeControllerManager: kubeControllerManager, + snapshotController: snapshotControllerManager, + }, nil +} + +func checkPodDebugHandlers(c clientset.Interface, requested bool, component, podName string) bool { + if !requested { + return false + } + if podName == "" { + klog.Warningf("Can't find %s pod. Grabbing metrics from %s is disabled.", component, component) + return false + } + + // The debug handlers on the host where the pod runs might be disabled. + // We can check that indirectly by trying to retrieve log output. + limit := int64(1) + if _, err := c.CoreV1().Pods(metav1.NamespaceSystem).GetLogs(podName, &v1.PodLogOptions{LimitBytes: &limit}).DoRaw(context.TODO()); err != nil { + klog.Warningf("Can't retrieve log output of %s (%q). Debug handlers might be disabled in kubelet. Grabbing metrics from %s is disabled.", + podName, err, component) + return false + } + + // Metrics gathering enabled. + return true +} + +// HasControlPlanePods returns true if metrics grabber was able to find control-plane pods +func (g *Grabber) HasControlPlanePods() bool { + return g.kubeScheduler != "" && g.kubeControllerManager != "" +} + +// GrabFromKubelet returns metrics from kubelet +func (g *Grabber) GrabFromKubelet(nodeName string) (KubeletMetrics, error) { + nodes, err := g.client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{FieldSelector: fields.Set{"metadata.name": nodeName}.AsSelector().String()}) + if err != nil { + return KubeletMetrics{}, err + } + if len(nodes.Items) != 1 { + return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items) + } + kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port + return g.grabFromKubeletInternal(nodeName, int(kubeletPort)) +} + +func (g *Grabber) grabFromKubeletInternal(nodeName string, kubeletPort int) (KubeletMetrics, error) { + if kubeletPort <= 0 || kubeletPort > 65535 { + return KubeletMetrics{}, fmt.Errorf("Invalid Kubelet port %v. Skipping Kubelet's metrics gathering", kubeletPort) + } + output, err := g.getMetricsFromNode(nodeName, int(kubeletPort)) + if err != nil { + return KubeletMetrics{}, err + } + return parseKubeletMetrics(output) +} + +// GrabFromScheduler returns metrics from scheduler +func (g *Grabber) GrabFromScheduler() (SchedulerMetrics, error) { + if !g.grabFromScheduler { + return SchedulerMetrics{}, fmt.Errorf("kube-scheduler: %w", MetricsGrabbingDisabledError) + } + + var err error + + g.waitForSchedulerReadyOnce.Do(func() { + if readyErr := e2epod.WaitTimeoutForPodReadyInNamespace(g.client, g.kubeScheduler, metav1.NamespaceSystem, 5*time.Minute); readyErr != nil { + err = fmt.Errorf("error waiting for kube-scheduler pod to be ready: %w", readyErr) + } + }) + if err != nil { + return SchedulerMetrics{}, err + } + + var lastMetricsFetchErr error + var output string + if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + output, lastMetricsFetchErr = g.getSecureMetricsFromPod(g.kubeScheduler, metav1.NamespaceSystem, kubeSchedulerPort) + return lastMetricsFetchErr == nil, nil + }); metricsWaitErr != nil { + err := fmt.Errorf("error waiting for kube-scheduler pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr) + return SchedulerMetrics{}, err + } + + return parseSchedulerMetrics(output) +} + +// GrabFromClusterAutoscaler returns metrics from cluster autoscaler +func (g *Grabber) GrabFromClusterAutoscaler() (ClusterAutoscalerMetrics, error) { + if !g.HasControlPlanePods() && g.externalClient == nil { + return ClusterAutoscalerMetrics{}, fmt.Errorf("ClusterAutoscaler: %w", MetricsGrabbingDisabledError) + } + var client clientset.Interface + var namespace string + if g.externalClient != nil { + client = g.externalClient + namespace = "kubemark" + } else { + client = g.client + namespace = metav1.NamespaceSystem + } + output, err := g.getMetricsFromPod(client, "cluster-autoscaler", namespace, 8085) + if err != nil { + return ClusterAutoscalerMetrics{}, err + } + return parseClusterAutoscalerMetrics(output) +} + +// GrabFromControllerManager returns metrics from controller manager +func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error) { + if !g.grabFromControllerManager { + return ControllerManagerMetrics{}, fmt.Errorf("kube-controller-manager: %w", MetricsGrabbingDisabledError) + } + + var err error + + g.waitForControllerManagerReadyOnce.Do(func() { + if readyErr := e2epod.WaitTimeoutForPodReadyInNamespace(g.client, g.kubeControllerManager, metav1.NamespaceSystem, 5*time.Minute); readyErr != nil { + err = fmt.Errorf("error waiting for kube-controller-manager pod to be ready: %w", readyErr) + } + }) + if err != nil { + return ControllerManagerMetrics{}, err + } + + var output string + var lastMetricsFetchErr error + if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + output, lastMetricsFetchErr = g.getSecureMetricsFromPod(g.kubeControllerManager, metav1.NamespaceSystem, kubeControllerManagerPort) + return lastMetricsFetchErr == nil, nil + }); metricsWaitErr != nil { + err := fmt.Errorf("error waiting for kube-controller-manager to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr) + return ControllerManagerMetrics{}, err + } + + return parseControllerManagerMetrics(output) +} + +// GrabFromSnapshotController returns metrics from controller manager +func (g *Grabber) GrabFromSnapshotController(podName string, port int) (SnapshotControllerMetrics, error) { + if !g.grabFromSnapshotController { + return SnapshotControllerMetrics{}, fmt.Errorf("volume-snapshot-controller: %w", MetricsGrabbingDisabledError) + } + + // Use overrides if provided via test config flags. + // Otherwise, use the default volume-snapshot-controller pod name and port. + if podName == "" { + podName = g.snapshotController + } + if port == 0 { + port = snapshotControllerPort + } + + var err error + + g.waitForSnapshotControllerReadyOnce.Do(func() { + if readyErr := e2epod.WaitTimeoutForPodReadyInNamespace(g.client, podName, metav1.NamespaceSystem, 5*time.Minute); readyErr != nil { + err = fmt.Errorf("error waiting for volume-snapshot-controller pod to be ready: %w", readyErr) + } + }) + if err != nil { + return SnapshotControllerMetrics{}, err + } + + var output string + var lastMetricsFetchErr error + if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + output, lastMetricsFetchErr = g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port) + return lastMetricsFetchErr == nil, nil + }); metricsWaitErr != nil { + err = fmt.Errorf("error waiting for volume-snapshot-controller pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr) + return SnapshotControllerMetrics{}, err + } + + return parseSnapshotControllerMetrics(output) +} + +// GrabFromAPIServer returns metrics from API server +func (g *Grabber) GrabFromAPIServer() (APIServerMetrics, error) { + output, err := g.getMetricsFromAPIServer() + if err != nil { + return APIServerMetrics{}, err + } + return parseAPIServerMetrics(output) +} + +// Grab returns metrics from corresponding component +func (g *Grabber) Grab() (Collection, error) { + result := Collection{} + var errs []error + if g.grabFromAPIServer { + metrics, err := g.GrabFromAPIServer() + if err != nil { + errs = append(errs, err) + } else { + result.APIServerMetrics = metrics + } + } + if g.grabFromScheduler { + metrics, err := g.GrabFromScheduler() + if err != nil { + errs = append(errs, err) + } else { + result.SchedulerMetrics = metrics + } + } + if g.grabFromControllerManager { + metrics, err := g.GrabFromControllerManager() + if err != nil { + errs = append(errs, err) + } else { + result.ControllerManagerMetrics = metrics + } + } + if g.grabFromSnapshotController { + metrics, err := g.GrabFromSnapshotController(g.snapshotController, snapshotControllerPort) + if err != nil { + errs = append(errs, err) + } else { + result.SnapshotControllerMetrics = metrics + } + } + if g.grabFromClusterAutoscaler { + metrics, err := g.GrabFromClusterAutoscaler() + if err != nil { + errs = append(errs, err) + } else { + result.ClusterAutoscalerMetrics = metrics + } + } + if g.grabFromKubelets { + result.KubeletMetrics = make(map[string]KubeletMetrics) + nodes, err := g.client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + if err != nil { + errs = append(errs, err) + } else { + for _, node := range nodes.Items { + kubeletPort := node.Status.DaemonEndpoints.KubeletEndpoint.Port + metrics, err := g.grabFromKubeletInternal(node.Name, int(kubeletPort)) + if err != nil { + errs = append(errs, err) + } + result.KubeletMetrics[node.Name] = metrics + } + } + } + if len(errs) > 0 { + return result, fmt.Errorf("Errors while grabbing metrics: %v", errs) + } + return result, nil +} + +// getMetricsFromPod retrieves metrics data from an insecure port. +func (g *Grabber) getMetricsFromPod(client clientset.Interface, podName string, namespace string, port int) (string, error) { + rawOutput, err := client.CoreV1().RESTClient().Get(). + Namespace(namespace). + Resource("pods"). + SubResource("proxy"). + Name(fmt.Sprintf("%s:%d", podName, port)). + Suffix("metrics"). + Do(context.TODO()).Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} + +// getSecureMetricsFromPod retrieves metrics from a pod that uses TLS +// and checks client credentials. Conceptually this function is +// similar to "kubectl port-forward" + "kubectl get --raw +// https://localhost:/metrics". It uses the same credentials +// as kubelet. +func (g *Grabber) getSecureMetricsFromPod(podName string, namespace string, port int) (string, error) { + dialer := e2epod.NewDialer(g.client, g.config) + metricConfig := rest.CopyConfig(g.config) + addr := e2epod.Addr{ + Namespace: namespace, + PodName: podName, + Port: port, + } + metricConfig.Dial = func(ctx context.Context, network, address string) (net.Conn, error) { + return dialer.DialContainerPort(ctx, addr) + } + // This should make it possible verify the server, but while it + // got past the server name check, certificate validation + // still failed. + metricConfig.Host = addr.String() + metricConfig.ServerName = "localhost" + // Verifying the pod certificate with the same root CA + // as for the API server led to an error about "unknown root + // certificate". Disabling certificate checking on the client + // side gets around that and should be good enough for + // E2E testing. + metricConfig.Insecure = true + metricConfig.CAFile = "" + metricConfig.CAData = nil + + // clientset.NewForConfig is used because + // metricClient.RESTClient() is directly usable, in contrast + // to the client constructed by rest.RESTClientFor(). + metricClient, err := clientset.NewForConfig(metricConfig) + if err != nil { + return "", err + } + + rawOutput, err := metricClient.RESTClient().Get(). + AbsPath("metrics"). + Do(context.TODO()).Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/pod.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/pod.go new file mode 100644 index 0000000000..7d31670ec2 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/pod.go @@ -0,0 +1,29 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "time" +) + +// LatencyMetric is a struct for dashboard metrics. +type LatencyMetric struct { + Perc50 time.Duration `json:"Perc50"` + Perc90 time.Duration `json:"Perc90"` + Perc99 time.Duration `json:"Perc99"` + Perc100 time.Duration `json:"Perc100"` +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/scheduler_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/scheduler_metrics.go new file mode 100644 index 0000000000..ac2982e25a --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/scheduler_metrics.go @@ -0,0 +1,40 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import "k8s.io/component-base/metrics/testutil" + +// SchedulerMetrics is metrics for scheduler +type SchedulerMetrics testutil.Metrics + +// Equal returns true if all metrics are the same as the arguments. +func (m *SchedulerMetrics) Equal(o SchedulerMetrics) bool { + return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) +} + +func newSchedulerMetrics() SchedulerMetrics { + result := testutil.NewMetrics() + return SchedulerMetrics(result) +} + +func parseSchedulerMetrics(data string) (SchedulerMetrics, error) { + result := newSchedulerMetrics() + if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { + return SchedulerMetrics{}, err + } + return result, nil +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/snapshot_controller_metrics.go b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/snapshot_controller_metrics.go new file mode 100644 index 0000000000..41e381b810 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/metrics/snapshot_controller_metrics.go @@ -0,0 +1,40 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import "k8s.io/component-base/metrics/testutil" + +// SnapshotControllerMetrics is metrics for controller manager +type SnapshotControllerMetrics testutil.Metrics + +// Equal returns true if all metrics are the same as the arguments. +func (m *SnapshotControllerMetrics) Equal(o SnapshotControllerMetrics) bool { + return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) +} + +func newSnapshotControllerMetrics() SnapshotControllerMetrics { + result := testutil.NewMetrics() + return SnapshotControllerMetrics(result) +} + +func parseSnapshotControllerMetrics(data string) (SnapshotControllerMetrics, error) { + result := newSnapshotControllerMetrics() + if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { + return SnapshotControllerMetrics{}, err + } + return result, nil +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/perftype/perftype.go b/vendor/k8s.io/kubernetes/test/e2e/perftype/perftype.go new file mode 100644 index 0000000000..0a27090877 --- /dev/null +++ b/vendor/k8s.io/kubernetes/test/e2e/perftype/perftype.go @@ -0,0 +1,53 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package perftype + +// TODO(random-liu): Replace this with prometheus' data model. + +// The following performance data structures are generalized and well-formatted. +// They can be pretty printed in json format and be analyzed by other performance +// analyzing tools, such as Perfdash (k8s.io/contrib/perfdash). + +// DataItem is the data point. +type DataItem struct { + // Data is a map from bucket to real data point (e.g. "Perc90" -> 23.5). Notice + // that all data items with the same label combination should have the same buckets. + Data map[string]float64 `json:"data"` + // Unit is the data unit. Notice that all data items with the same label combination + // should have the same unit. + Unit string `json:"unit"` + // Labels is the labels of the data item. + Labels map[string]string `json:"labels,omitempty"` +} + +// PerfData contains all data items generated in current test. +type PerfData struct { + // Version is the version of the metrics. The metrics consumer could use the version + // to detect metrics version change and decide what version to support. + Version string `json:"version"` + DataItems []DataItem `json:"dataItems"` + // Labels is the labels of the dataset. + Labels map[string]string `json:"labels,omitempty"` +} + +// PerfResultTag is the prefix of generated perfdata. Analyzing tools can find the perf result +// with this tag. +const PerfResultTag = "[Result:Performance]" + +// PerfResultEnd is the end of generated perfdata. Analyzing tools can find the end of the perf +// result with this tag. +const PerfResultEnd = "[Finish:Performance]" diff --git a/vendor/modules.txt b/vendor/modules.txt index 8428448ae0..c2b1721708 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -359,6 +359,8 @@ github.com/prometheus/client_golang/prometheus github.com/prometheus/client_golang/prometheus/collectors github.com/prometheus/client_golang/prometheus/internal github.com/prometheus/client_golang/prometheus/promhttp +github.com/prometheus/client_golang/prometheus/testutil +github.com/prometheus/client_golang/prometheus/testutil/promlint # github.com/prometheus/client_model v0.3.0 ## explicit; go 1.9 github.com/prometheus/client_model/go @@ -1108,6 +1110,7 @@ k8s.io/component-base/metrics k8s.io/component-base/metrics/legacyregistry k8s.io/component-base/metrics/prometheus/feature k8s.io/component-base/metrics/prometheusextension +k8s.io/component-base/metrics/testutil k8s.io/component-base/tracing k8s.io/component-base/tracing/api/v1 k8s.io/component-base/version @@ -1144,6 +1147,9 @@ k8s.io/kube-openapi/pkg/validation/spec ## explicit; go 1.19 k8s.io/kubectl/pkg/scale k8s.io/kubectl/pkg/util/podutils +# k8s.io/kubelet v0.26.0 => k8s.io/kubelet v0.26.0 +## explicit; go 1.19 +k8s.io/kubelet/pkg/apis/stats/v1alpha1 # k8s.io/kubernetes v1.26.0 ## explicit; go 1.19 k8s.io/kubernetes/pkg/api/legacyscheme @@ -1191,9 +1197,12 @@ k8s.io/kubernetes/pkg/volume/util/types k8s.io/kubernetes/pkg/volume/util/volumepathhandler k8s.io/kubernetes/test/e2e/framework k8s.io/kubernetes/test/e2e/framework/config +k8s.io/kubernetes/test/e2e/framework/debug +k8s.io/kubernetes/test/e2e/framework/debug/init k8s.io/kubernetes/test/e2e/framework/deployment k8s.io/kubernetes/test/e2e/framework/events k8s.io/kubernetes/test/e2e/framework/kubectl +k8s.io/kubernetes/test/e2e/framework/metrics k8s.io/kubernetes/test/e2e/framework/node k8s.io/kubernetes/test/e2e/framework/pod k8s.io/kubernetes/test/e2e/framework/pod/output @@ -1202,6 +1211,7 @@ k8s.io/kubernetes/test/e2e/framework/skipper k8s.io/kubernetes/test/e2e/framework/ssh k8s.io/kubernetes/test/e2e/framework/testfiles k8s.io/kubernetes/test/e2e/framework/volume +k8s.io/kubernetes/test/e2e/perftype k8s.io/kubernetes/test/e2e/storage/podlogs k8s.io/kubernetes/test/e2e/storage/utils k8s.io/kubernetes/test/e2e/testing-manifests