Skip to content

Commit

Permalink
Export various stats about services in the metrics exported by this c…
Browse files Browse the repository at this point in the history
…ontroller.
  • Loading branch information
mmamczur committed Feb 13, 2023
1 parent ed35b2e commit f476884
Show file tree
Hide file tree
Showing 4 changed files with 1,001 additions and 0 deletions.
7 changes: 7 additions & 0 deletions cmd/glbc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/ingress-gce/pkg/l4lb"
"k8s.io/ingress-gce/pkg/psc"
"k8s.io/ingress-gce/pkg/serviceattachment"
"k8s.io/ingress-gce/pkg/servicemetrics"
"k8s.io/ingress-gce/pkg/svcneg"
"k8s.io/klog/v2"

Expand Down Expand Up @@ -293,6 +294,12 @@ func runControllers(ctx *ingctx.ControllerContext) {
klog.V(0).Infof("PSC Controller started")
}

if flags.F.EnableServiceMetrics {
metricsController := servicemetrics.NewController(ctx, flags.F.MetricsExportInterval, stopCh)
go metricsController.Run()
klog.V(0).Infof("Service Metrics Controller started")
}

var zoneGetter negtypes.ZoneGetter
zoneGetter = lbc.Translator
// In NonGCP mode, use the zone specified in gce.conf directly.
Expand Down
2 changes: 2 additions & 0 deletions pkg/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ var (
EnablePinhole bool
EnableL4ILBDualStack bool
EnableMultipleIGs bool
EnableServiceMetrics bool
MaxIGSize int
}{
GCERateLimitScale: 1.0,
Expand Down Expand Up @@ -244,6 +245,7 @@ L7 load balancing. CSV values accepted. Example: -node-port-ranges=80,8080,400-5
flag.BoolVar(&F.RunIngressController, "run-ingress-controller", true, `Optional, whether or not to run IngressController as part of glbc. If set to false, ingress resources will not be processed. Only the L4 Service controller will be run, if that flag is set to true.`)
flag.BoolVar(&F.RunL4Controller, "run-l4-controller", false, `Optional, whether or not to run L4 Service Controller as part of glbc. If set to true, services of Type:LoadBalancer with Internal annotation will be processed by this controller.`)
flag.BoolVar(&F.RunL4NetLBController, "run-l4-netlb-controller", false, `Optional, f enabled then the L4NetLbController will be run.`)
flag.BoolVar(&F.EnableServiceMetrics, "enable-service-metrics", false, `Optional, if enabled then the service metrics controller will be run.`)
flag.BoolVar(&F.EnableBackendConfigHealthCheck, "enable-backendconfig-healthcheck", false, "Enable configuration of HealthChecks from the BackendConfig")
flag.BoolVar(&F.EnablePSC, "enable-psc", false, "Enable PSC controller")
flag.BoolVar(&F.EnableIngressGAFields, "enable-ingress-ga-fields", false, "Enable using Ingress Class GA features")
Expand Down
304 changes: 304 additions & 0 deletions pkg/servicemetrics/servicemetrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
package servicemetrics

import (
"fmt"
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
listers "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/ingress-gce/pkg/annotations"
"k8s.io/ingress-gce/pkg/context"
"k8s.io/ingress-gce/pkg/utils"
"k8s.io/ingress-gce/pkg/utils/common"
"k8s.io/klog/v2"
"k8s.io/legacy-cloud-providers/gce"
)

var (
serviceL4ProtocolStatsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_l4_protocol_stats",
Help: "Number of services broken down by various stats",
},
[]string{"type", "external_traffic_policy", "internal_traffic_policy", "session_affinity_config", "protocol", "number_of_ports"},
)
serviceIPStackStatsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_ip_stack_stats",
Help: "Number of services broken down by various stats",
},
[]string{"type", "external_traffic_policy", "internal_traffic_policy", "ip_families", "ip_family_policy", "is_static_ip_v4", "is_static_ip_v6"},
)
serviceGCPFeaturesStatsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_gcp_features_stats",
Help: "Number of services broken down by various stats",
},
[]string{"type", "network_tier", "global_access", "custom_subnet"},
)
)

func init() {
klog.V(3).Infof("Registering Service stats usage metrics %v", serviceL4ProtocolStatsCount)
prometheus.MustRegister(serviceL4ProtocolStatsCount)
prometheus.MustRegister(serviceIPStackStatsCount)
prometheus.MustRegister(serviceGCPFeaturesStatsCount)
}

// Controller is the controller that exposes and populates metrics containing various stats about Services in the cluster.
type Controller struct {
ctx *context.ControllerContext
stopCh chan struct{}
svcQueue utils.TaskQueue
metricsInterval time.Duration
serviceInformer cache.SharedIndexInformer
}

// NewController creates a new Controller.
func NewController(ctx *context.ControllerContext, exportInterval time.Duration, stopCh chan struct{}) *Controller {
svcMetrics := &Controller{
ctx: ctx,
stopCh: stopCh,
serviceInformer: ctx.ServiceInformer,
metricsInterval: exportInterval,
}
return svcMetrics
}

// Run starts the controller until stopped via the stop channel.
func (c *Controller) Run() {
klog.Infof("Starting Service Metric Stats controller")
go func() {
time.Sleep(c.metricsInterval)
wait.Until(c.export, c.metricsInterval, c.stopCh)
}()
<-c.stopCh
}

// serviceL4ProtocolMetricState defines metric state related to the L4 protocol
// related part of services.
type serviceL4ProtocolMetricState struct {
Type string
ExternalTrafficPolicy string
InternalTrafficPolicy string
SessionAffinityConfig string
NumberOfPorts string
Protocol string
}

// serviceIPStackMetricState defines metric state related to the IP stack of services.
type serviceIPStackMetricState struct {
Type string
ExternalTrafficPolicy string
InternalTrafficPolicy string
IPFamilies string
IPFamilyPolicy string
IsStaticIPv4 bool
IsStaticIPv6 bool
}

// serviceGCPFeaturesMetricState defines metric state related to the GCP
// specific features of services.
type serviceGCPFeaturesMetricState struct {
Type string
NetworkTier string
GlobalAccess bool
CustomSubnet bool
}

func (c *Controller) export() {
serviceLister := c.serviceInformer.GetIndexer()
services, err := listers.NewServiceLister(serviceLister).List(labels.Everything())
if err != nil {
klog.Errorf("failed to list services err=%v", err)
return
}

l4ProtocolState, ipStackState, gcpFeaturesState := groupServices(services)

updatePrometheusMetrics(l4ProtocolState, ipStackState, gcpFeaturesState)
}

func groupServices(services []*v1.Service) (map[serviceL4ProtocolMetricState]int64, map[serviceIPStackMetricState]int64, map[serviceGCPFeaturesMetricState]int64) {
l4ProtocolState := make(map[serviceL4ProtocolMetricState]int64)
ipStackState := make(map[serviceIPStackMetricState]int64)
gcpFeaturesState := make(map[serviceGCPFeaturesMetricState]int64)

for _, service := range services {
l4Protocol, ipStack, gcpFeatures := metricsForService(service)
l4ProtocolState[*l4Protocol]++
ipStackState[*ipStack]++
gcpFeaturesState[*gcpFeatures]++
}
return l4ProtocolState, ipStackState, gcpFeaturesState
}

func updatePrometheusMetrics(l4ProtocolState map[serviceL4ProtocolMetricState]int64, ipStackState map[serviceIPStackMetricState]int64, gcpFeaturesState map[serviceGCPFeaturesMetricState]int64) {
for serviceStat, count := range l4ProtocolState {
serviceL4ProtocolStatsCount.With(prometheus.Labels{
"type": serviceStat.Type,
"external_traffic_policy": serviceStat.ExternalTrafficPolicy,
"internal_traffic_policy": serviceStat.InternalTrafficPolicy,
"session_affinity_config": serviceStat.SessionAffinityConfig,
"protocol": serviceStat.Protocol,
"number_of_ports": serviceStat.NumberOfPorts,
}).Set(float64(count))
}
for serviceStat, count := range ipStackState {
serviceIPStackStatsCount.With(prometheus.Labels{
"type": serviceStat.Type,
"external_traffic_policy": serviceStat.ExternalTrafficPolicy,
"internal_traffic_policy": serviceStat.InternalTrafficPolicy,
"ip_families": serviceStat.IPFamilies,
"ip_family_policy": serviceStat.IPFamilyPolicy,
"is_static_ip_v4": strconv.FormatBool(serviceStat.IsStaticIPv4),
"is_static_ip_v6": strconv.FormatBool(serviceStat.IsStaticIPv6),
}).Set(float64(count))
}
for serviceStat, count := range gcpFeaturesState {
serviceGCPFeaturesStatsCount.With(prometheus.Labels{
"type": serviceStat.Type,
"network_tier": serviceStat.NetworkTier,
"global_access": strconv.FormatBool(serviceStat.GlobalAccess),
"custom_subnet": strconv.FormatBool(serviceStat.CustomSubnet),
}).Set(float64(count))
}
}

func metricsForService(service *v1.Service) (*serviceL4ProtocolMetricState, *serviceIPStackMetricState, *serviceGCPFeaturesMetricState) {
serviceType := getServiceType(service)
internalTrafficPolicy := getInternalTrafficPolicy(service)
externalTrafficPolicy := getExternalTrafficPolicy(service)
l4Protocol := &serviceL4ProtocolMetricState{
Type: serviceType,
ExternalTrafficPolicy: externalTrafficPolicy,
InternalTrafficPolicy: internalTrafficPolicy,
SessionAffinityConfig: getSessionAffinityConfig(service),
NumberOfPorts: getPortsBucket(service.Spec.Ports),
Protocol: getProtocol(service.Spec.Ports),
}
ipStack := &serviceIPStackMetricState{
Type: serviceType,
ExternalTrafficPolicy: externalTrafficPolicy,
InternalTrafficPolicy: internalTrafficPolicy,
IPFamilies: getIPFamilies(service.Spec.IPFamilies),
IPFamilyPolicy: getIPFamilyPolicy(service.Spec.IPFamilyPolicy),
IsStaticIPv4: service.Spec.LoadBalancerIP != "",
IsStaticIPv6: false,
}
netTier, _ := utils.GetNetworkTier(service)
gcpFeatures := &serviceGCPFeaturesMetricState{
Type: serviceType,
NetworkTier: string(netTier),
GlobalAccess: gce.GetLoadBalancerAnnotationAllowGlobalAccess(service),
CustomSubnet: gce.GetLoadBalancerAnnotationSubnet(service) != "",
}
return l4Protocol, ipStack, gcpFeatures
}

func getExternalTrafficPolicy(service *v1.Service) string {
if service.Spec.ExternalTrafficPolicy == "" {
return string(v1.ServiceExternalTrafficPolicyTypeCluster)
}
return string(service.Spec.ExternalTrafficPolicy)
}

func getInternalTrafficPolicy(service *v1.Service) string {
if service.Spec.InternalTrafficPolicy == nil {
return string(v1.ServiceInternalTrafficPolicyCluster)
}
return string(*service.Spec.InternalTrafficPolicy)
}

func getPortsBucket(ports []v1.ServicePort) string {
n := len(ports)
if n <= 1 {
return fmt.Sprint(n)
}
if n <= 5 {
return "2-5"
}
if n <= 100 {
return "6-100"
}
return "100+"
}

func protocolOrDefault(port v1.ServicePort) string {
if port.Protocol == "" {
return string(v1.ProtocolTCP)
}
return string(port.Protocol)
}

func getProtocol(ports []v1.ServicePort) string {
if len(ports) == 0 {
return ""
}
protocol := protocolOrDefault(ports[0])
for _, port := range ports {
if protocol != protocolOrDefault(port) {
return "mixed"
}
}
return protocol
}

func getIPFamilies(families []v1.IPFamily) string {
if len(families) == 2 {
return fmt.Sprintf("%s-%s", string(families[0]), string(families[1]))
}
return string(families[0])
}

func getIPFamilyPolicy(policyType *v1.IPFamilyPolicyType) string {
if policyType == nil {
return string(v1.IPFamilyPolicySingleStack)
}
return string(*policyType)
}

func getServiceType(service *v1.Service) string {
if service.Spec.Type != v1.ServiceTypeLoadBalancer {
return string(service.Spec.Type)
}
wantsL4ILB, _ := annotations.WantsL4ILB(service)
if wantsL4ILB {
if common.HasGivenFinalizer(service.ObjectMeta, common.ILBFinalizerV2) {
return "SubsettingILB"
}
return "LegacyILB"
}
wantsL4NetLB, _ := annotations.WantsL4NetLB(service)
if wantsL4NetLB {
if common.HasGivenFinalizer(service.ObjectMeta, common.NetLBFinalizerV2) {
return "RBSXLB"
}
return "LegacyXLB"
}
return ""
}

func getSessionAffinityConfig(service *v1.Service) string {
if service.Spec.SessionAffinity != v1.ServiceAffinityClientIP {
return ""
}
if service.Spec.SessionAffinityConfig == nil ||
service.Spec.SessionAffinityConfig.ClientIP == nil ||
service.Spec.SessionAffinityConfig.ClientIP.TimeoutSeconds == nil {
return "10800"
}
timeout := *service.Spec.SessionAffinityConfig.ClientIP.TimeoutSeconds
if timeout < 10800 {
return "0-10799"
}
if timeout == 10800 {
return "10800"
}
return "10800+"
}
Loading

0 comments on commit f476884

Please sign in to comment.