Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deployment count metric #8247

Merged
merged 3 commits into from
Dec 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def __init__(self, name, init_config, instances):
'allowed_labels': ['storageclass', 'phase'],
},
'kube_service_spec_type': {'metric_name': 'service.count', 'allowed_labels': ['namespace', 'type']},
# is a count by namespace and phase <Active|Terminating>
'kube_namespace_status_phase': {'metric_name': 'namespace.count', 'allowed_labels': ['phase']},
'kube_replicaset_owner': {
'metric_name': 'replicaset.count',
Expand Down Expand Up @@ -125,12 +124,11 @@ def __init__(self, name, init_config, instances):
'kube_node_spec_unschedulable': self.kube_node_spec_unschedulable,
'kube_resourcequota': self.kube_resourcequota,
'kube_limitrange': self.kube_limitrange,
'kube_persistentvolume_status_phase': self.count_objects_by_tags,
'kube_persistentvolume_status_phase': self.sum_values_by_tags,
'kube_service_spec_type': self.count_objects_by_tags,
'kube_namespace_status_phase': self.count_objects_by_tags,
'kube_namespace_status_phase': self.sum_values_by_tags,
'kube_replicaset_owner': self.count_objects_by_tags,
'kube_job_owner': self.count_objects_by_tags,
# to get overall count is to filter by Available
'kube_deployment_status_observed_generation': self.count_objects_by_tags,
}

Expand Down Expand Up @@ -869,6 +867,25 @@ def kube_limitrange(self, metric, scraper_config):
else:
self.log.error("Metric type %s unsupported for metric %s", metric.type, metric.name)

def sum_values_by_tags(self, metric, scraper_config):
""" Sum values by allowed tags and submit counts as gauges. """
config = self.object_count_params[metric.name]
metric_name = "{}.{}".format(scraper_config['namespace'], config['metric_name'])
object_counter = Counter()

for sample in metric.samples:
tags = []
for l in config['allowed_labels']:
tag = self._label_to_tag(l, sample[self.SAMPLE_LABELS], scraper_config)
if tag is None:
tag = self._format_tag(l, "unknown", scraper_config)
tags.append(tag)
tags += scraper_config['custom_tags']
object_counter[tuple(sorted(tags))] += sample[self.SAMPLE_VALUE]

for tags, count in iteritems(object_counter):
self.gauge(metric_name, count, tags=list(tags))

def count_objects_by_tags(self, metric, scraper_config):
""" Count objects by allowed tags and submit counts as gauges. """
config = self.object_count_params[metric.name]
Expand All @@ -883,7 +900,7 @@ def count_objects_by_tags(self, metric, scraper_config):
tag = self._format_tag(l, "unknown", scraper_config)
tags.append(tag)
tags += scraper_config['custom_tags']
object_counter[tuple(sorted(tags))] += sample[self.SAMPLE_VALUE]
object_counter[tuple(sorted(tags))] += 1

for tags, count in iteritems(object_counter):
self.gauge(metric_name, count, tags=list(tags))
Expand Down
4 changes: 2 additions & 2 deletions kubernetes_state/tests/fixtures/prometheus.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ kube_deployment_spec_strategy_rollingupdate_max_unavailable{deployment="tiller-d
# HELP kube_deployment_status_observed_generation The generation observed by the deployment controller.
# TYPE kube_deployment_status_observed_generation gauge
kube_deployment_status_observed_generation{deployment="failingtest",namespace="default"} 1
kube_deployment_status_observed_generation{deployment="jaundiced-numbat-kube-state-metrics",namespace="default"} 1
kube_deployment_status_observed_generation{deployment="jaundiced-numbat-kube-state-metrics",namespace="default"} 4
kube_deployment_status_observed_generation{deployment="kube-dns",namespace="kube-system"} 1
kube_deployment_status_observed_generation{deployment="tiller-deploy",namespace="kube-system"} 1
kube_deployment_status_observed_generation{deployment="tiller-deploy",namespace="kube-system"} 5
# HELP kube_deployment_status_replicas The number of replicas per deployment.
# TYPE kube_deployment_status_replicas gauge
kube_deployment_status_replicas{deployment="failingtest",namespace="default"} 1
Expand Down
82 changes: 64 additions & 18 deletions kubernetes_state/tests/test_kubernetes_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@
NAMESPACE + '.container.gpu.request',
NAMESPACE + '.container.gpu.limit',
# replicasets
NAMESPACE + '.replicaset.count',
NAMESPACE + '.replicaset.replicas',
NAMESPACE + '.replicaset.fully_labeled_replicas',
NAMESPACE + '.replicaset.replicas_ready',
Expand All @@ -100,10 +99,7 @@
NAMESPACE + '.resourcequota.limits.memory.limit',
# limitrange
NAMESPACE + '.limitrange.cpu.default_request',
# services
NAMESPACE + '.service.count',
# jobs
NAMESPACE + '.job.count',
NAMESPACE + '.job.failed',
NAMESPACE + '.job.succeeded',
# vpa
Expand All @@ -112,8 +108,6 @@
NAMESPACE + '.vpa.uncapped_target',
NAMESPACE + '.vpa.upperbound',
NAMESPACE + '.vpa.update_mode',
# namespaces
NAMESPACE + '.namespace.count',
]

TAGS = {
Expand Down Expand Up @@ -148,9 +142,6 @@
'namespace:kube-system',
],
NAMESPACE + '.pod.count': ['uid:b6fb4273-2dd6-4edb-9a23-7642bb121806', 'created_by_kind:daemonset'],
NAMESPACE + '.replicaset.count': ['owner_kind:deployment', 'owner_name:metrics-server-v0.3.6'],
NAMESPACE + '.namespace.count': ['phase:active', 'phase:terminating'],
NAMESPACE + '.job.count': ['owner_kind:cronjob', 'owner_name:a-cronjob'],
NAMESPACE
+ '.container.status_report.count.waiting': [
'reason:containercreating',
Expand All @@ -163,14 +154,6 @@
],
NAMESPACE + '.container.status_report.count.terminated': ['pod:pod2'],
NAMESPACE + '.persistentvolumeclaim.request_storage': ['storageclass:manual'],
NAMESPACE
+ '.service.count': [
'namespace:kube-system',
'namespace:default',
'type:clusterip',
'type:nodeport',
'type:loadbalancer',
],
NAMESPACE + '.job.failed': ['job:hello', 'job_name:hello2'],
NAMESPACE + '.job.succeeded': ['job:hello', 'job_name:hello2'],
NAMESPACE + '.hpa.condition': ['namespace:default', 'hpa:myhpa', 'condition:true', 'status:abletoscale'],
Expand Down Expand Up @@ -373,7 +356,70 @@ def test_update_kube_state_metrics(aggregator, instance, check):
value=0,
)

# deployment counts
# services count
aggregator.assert_metric(
NAMESPACE + '.service.count',
tags=['namespace:default', 'type:clusterip', 'optional:tag1'],
value=3,
)
aggregator.assert_metric(
NAMESPACE + '.service.count',
tags=['namespace:default', 'type:loadbalancer', 'optional:tag1'],
value=2,
)
aggregator.assert_metric(
NAMESPACE + '.service.count',
tags=['namespace:kube-system', 'type:clusterip', 'optional:tag1'],
value=4,
)
aggregator.assert_metric(
NAMESPACE + '.service.count',
tags=['namespace:kube-system', 'type:nodeport', 'optional:tag1'],
value=1,
)

# namespaces count
aggregator.assert_metric(
NAMESPACE + '.namespace.count',
tags=['phase:active', 'optional:tag1'],
value=4,
)
aggregator.assert_metric(
NAMESPACE + '.namespace.count',
tags=['phase:terminating', 'optional:tag1'],
value=0,
)

# replicasets count
aggregator.assert_metric(
NAMESPACE + '.replicaset.count',
tags=['namespace:kube-system', 'owner_kind:deployment', 'owner_name:l7-default-backend', 'optional:tag1'],
value=1,
)
aggregator.assert_metric(
NAMESPACE + '.replicaset.count',
tags=['namespace:kube-system', 'owner_kind:deployment', 'owner_name:metrics-server-v0.3.6', 'optional:tag1'],
value=1,
)
aggregator.assert_metric(
NAMESPACE + '.replicaset.count',
tags=['namespace:kube-system', 'owner_kind:deployment', 'owner_name:kube-dns-autoscaler', 'optional:tag1'],
value=1,
)

# jobs count
aggregator.assert_metric(
NAMESPACE + '.job.count',
tags=['namespace:default', 'owner_kind:cronjob', 'owner_name:a-cronjob', 'optional:tag1'],
value=1,
)
aggregator.assert_metric(
NAMESPACE + '.job.count',
tags=['namespace:default', 'owner_kind:<none>', 'owner_name:<none>', 'optional:tag1'],
value=1,
)

# deployments count
aggregator.assert_metric(
NAMESPACE + '.deployment.count',
tags=['namespace:default', 'optional:tag1'],
Expand Down