From cbdf4af678fdda960ecdbaae7cb0630b0174ae3d Mon Sep 17 00:00:00 2001 From: Xavier MARCELET Date: Mon, 8 May 2023 04:53:29 +0200 Subject: [PATCH] continue object enumeration on error, fixes #85 --- collectors/applications.go | 201 +++++++++++++++++++----------------- collectors/organizations.go | 123 ++++++++++++---------- collectors/spaces.go | 164 +++++++++++++++-------------- fetcher/fetcher.go | 2 +- fetcher/fetcher_handlers.go | 10 +- 5 files changed, 275 insertions(+), 225 deletions(-) diff --git a/collectors/applications.go b/collectors/applications.go index 93a7c47d..18165860 100644 --- a/collectors/applications.go +++ b/collectors/applications.go @@ -161,7 +161,6 @@ func (c ApplicationsCollector) Collect(objs *models.CFObjects, ch chan<- prometh } else { err := c.reportApplicationsMetrics(objs, ch) if err != nil { - log.Error(err) errorMetric = float64(1) c.applicationsScrapeErrorsTotalMetric.Inc() } @@ -198,7 +197,109 @@ func (c ApplicationsCollector) Describe(ch chan<- *prometheus.Desc) { // 2. symmetrically in some corner cases, buildpack is null but // detected_buildpack is available. Use detected_buildpack // for compatibility with v0 +func (c ApplicationsCollector) reportApp(application models.Application, objs *models.CFObjects) error { + processes, ok := objs.AppProcesses[application.GUID] + if !ok { + return fmt.Errorf("could not find processes for application '%s'", application.GUID) + } + process := processes[0] + for _, cProc := range processes { + if cProc.Type == "web" { + process = cProc + } + } + spaceRel, ok := application.Relationships[constant.RelationshipTypeSpace] + if !ok { + return fmt.Errorf("could not find space relation in application '%s'", application.GUID) + } + space, ok := objs.Spaces[spaceRel.GUID] + if !ok { + return fmt.Errorf("could not find space with guid '%s'", spaceRel.GUID) + } + orgRel, ok := space.Relationships[constant.RelationshipTypeOrganization] + if !ok { + return fmt.Errorf("could not find org relation in space '%s'", space.GUID) + } + organization, ok := objs.Orgs[orgRel.GUID] + if !ok { + return fmt.Errorf("could not find org with guid '%s'", orgRel.GUID) + } + + appSum, ok := objs.AppSummaries[application.GUID] + if !ok { + return fmt.Errorf("could not find app summary with guid '%s'", application.GUID) + } + + // 1. + detectedBuildpack := appSum.DetectedBuildpack + if len(detectedBuildpack) == 0 { + detectedBuildpack = appSum.Buildpack + } + + // 2. + buildpack := appSum.Buildpack + if len(buildpack) == 0 { + buildpack = appSum.DetectedBuildpack + } + + c.applicationInfoMetric.WithLabelValues( + application.GUID, + application.Name, + detectedBuildpack, + buildpack, + organization.GUID, + organization.Name, + space.GUID, + space.Name, + appSum.StackID, + string(application.State), + ).Set(float64(1)) + + c.applicationInstancesMetric.WithLabelValues( + application.GUID, + application.Name, + organization.GUID, + organization.Name, + space.GUID, + space.Name, + string(application.State), + ).Set(float64(process.Instances.Value)) + + c.applicationInstancesRunningMetric.WithLabelValues( + application.GUID, + application.Name, + organization.GUID, + organization.Name, + space.GUID, + space.Name, + string(application.State), + ).Set(float64(appSum.RunningInstances)) + + c.applicationMemoryMbMetric.WithLabelValues( + application.GUID, + application.Name, + organization.GUID, + organization.Name, + space.GUID, + space.Name, + ).Set(float64(process.MemoryInMB.Value)) + + c.applicationDiskQuotaMbMetric.WithLabelValues( + application.GUID, + application.Name, + organization.GUID, + organization.Name, + space.GUID, + space.Name, + ).Set(float64(process.DiskInMB.Value)) + return nil +} + +// reportApplicationsMetrics +// 1. continue processing application list upon error func (c ApplicationsCollector) reportApplicationsMetrics(objs *models.CFObjects, ch chan<- prometheus.Metric) error { + var res error + c.applicationInfoMetric.Reset() c.applicationInstancesMetric.Reset() c.applicationInstancesRunningMetric.Reset() @@ -206,100 +307,12 @@ func (c ApplicationsCollector) reportApplicationsMetrics(objs *models.CFObjects, c.applicationDiskQuotaMbMetric.Reset() for _, application := range objs.Apps { - processes, ok := objs.AppProcesses[application.GUID] - if !ok { - return fmt.Errorf("could not find processes for application '%s'", application.GUID) - } - process := processes[0] - for _, cProc := range processes { - if cProc.Type == "web" { - process = cProc - } - } - - spaceRel, ok := application.Relationships[constant.RelationshipTypeSpace] - if !ok { - return fmt.Errorf("could not find space relation in application '%s'", application.GUID) - } - space, ok := objs.Spaces[spaceRel.GUID] - if !ok { - return fmt.Errorf("could not find space with guid '%s'", spaceRel.GUID) - } - orgRel, ok := space.Relationships[constant.RelationshipTypeOrganization] - if !ok { - return fmt.Errorf("could not find org relation in space '%s'", space.GUID) - } - organization, ok := objs.Orgs[orgRel.GUID] - if !ok { - return fmt.Errorf("could not find org with guid '%s'", orgRel.GUID) - } - appSum, ok := objs.AppSummaries[application.GUID] - if !ok { - return fmt.Errorf("could not find app summary with guid '%s'", application.GUID) - } - + err := c.reportApp(application, objs) // 1. - detectedBuildpack := appSum.DetectedBuildpack - if len(detectedBuildpack) == 0 { - detectedBuildpack = appSum.Buildpack - } - - // 2. - buildpack := appSum.Buildpack - if len(buildpack) == 0 { - buildpack = appSum.DetectedBuildpack + if err != nil { + log.Warn(err) + res = err } - - c.applicationInfoMetric.WithLabelValues( - application.GUID, - application.Name, - detectedBuildpack, - buildpack, - organization.GUID, - organization.Name, - space.GUID, - space.Name, - appSum.StackID, - string(application.State), - ).Set(float64(1)) - - c.applicationInstancesMetric.WithLabelValues( - application.GUID, - application.Name, - organization.GUID, - organization.Name, - space.GUID, - space.Name, - string(application.State), - ).Set(float64(process.Instances.Value)) - - c.applicationInstancesRunningMetric.WithLabelValues( - application.GUID, - application.Name, - organization.GUID, - organization.Name, - space.GUID, - space.Name, - string(application.State), - ).Set(float64(appSum.RunningInstances)) - - c.applicationMemoryMbMetric.WithLabelValues( - application.GUID, - application.Name, - organization.GUID, - organization.Name, - space.GUID, - space.Name, - ).Set(float64(process.MemoryInMB.Value)) - - c.applicationDiskQuotaMbMetric.WithLabelValues( - application.GUID, - application.Name, - organization.GUID, - organization.Name, - space.GUID, - space.Name, - ).Set(float64(process.DiskInMB.Value)) } c.applicationInfoMetric.Collect(ch) @@ -307,5 +320,5 @@ func (c ApplicationsCollector) reportApplicationsMetrics(objs *models.CFObjects, c.applicationInstancesRunningMetric.Collect(ch) c.applicationMemoryMbMetric.Collect(ch) c.applicationDiskQuotaMbMetric.Collect(ch) - return nil + return res } diff --git a/collectors/organizations.go b/collectors/organizations.go index 71ec2a15..caa2a743 100644 --- a/collectors/organizations.go +++ b/collectors/organizations.go @@ -4,6 +4,7 @@ import ( "fmt" "time" + "code.cloudfoundry.org/cli/resources" "github.com/bosh-prometheus/cf_exporter/models" "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" @@ -238,7 +239,6 @@ func (c OrganizationsCollector) Collect(objs *models.CFObjects, ch chan<- promet } else { err := c.reportOrganizationsMetrics(objs, ch) if err != nil { - log.Error(err) errorMetric = float64(1) c.organizationsScrapeErrorsTotalMetric.Inc() } @@ -274,7 +274,69 @@ func (c OrganizationsCollector) Describe(ch chan<- *prometheus.Desc) { c.lastOrganizationsScrapeDurationSecondsMetric.Describe(ch) } +func (c OrganizationsCollector) reportOrg(org resources.Organization, objs *models.CFObjects) error { + quotaName := "" + if org.QuotaGUID != "" { + quota, ok := objs.OrgQuotas[org.QuotaGUID] + if !ok { + return fmt.Errorf("could not find org quota with guid '%s'", org.QuotaGUID) + } + quotaName = quota.Name + c.organizationNonBasicServicesAllowedMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(BoolToFloat(quota.Services.PaidServicePlans)) + c.organizationInstanceMemoryMbLimitMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Apps.InstanceMemory)) + c.organizationTotalAppInstancesQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Apps.TotalAppInstances)) + c.organizationTotalAppTasksQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Apps.PerAppTasks)) + c.organizationTotalMemoryMbQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Apps.TotalMemory)) + c.organizationTotalPrivateDomainsQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Domains.TotalDomains)) + c.organizationTotalReservedRoutePortsQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Routes.TotalReservedPorts)) + c.organizationTotalRoutesQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Routes.TotalRoutes)) + c.organizationTotalServiceKeysQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Services.TotalServiceKeys)) + c.organizationTotalServicesQuotaMetric.WithLabelValues( + org.GUID, + org.Name, + ).Set(NullIntToFloat(quota.Services.TotalServiceInstances)) + } + c.organizationInfoMetric.WithLabelValues( + org.GUID, + org.Name, + quotaName, + ).Set(float64(1)) + + return nil +} + +// reportOrganizationsMetrics +// 1. continue processing application list upon error func (c OrganizationsCollector) reportOrganizationsMetrics(objs *models.CFObjects, ch chan<- prometheus.Metric) error { + var res error + c.organizationInfoMetric.Reset() c.organizationNonBasicServicesAllowedMetric.Reset() c.organizationInstanceMemoryMbLimitMetric.Reset() @@ -288,59 +350,12 @@ func (c OrganizationsCollector) reportOrganizationsMetrics(objs *models.CFObject c.organizationTotalServicesQuotaMetric.Reset() for _, cOrg := range objs.Orgs { - quotaName := "" - if cOrg.QuotaGUID != "" { - quota, ok := objs.OrgQuotas[cOrg.QuotaGUID] - if !ok { - return fmt.Errorf("could not find org quota with guid '%s'", cOrg.QuotaGUID) - } - quotaName = quota.Name - c.organizationNonBasicServicesAllowedMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(BoolToFloat(quota.Services.PaidServicePlans)) - c.organizationInstanceMemoryMbLimitMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Apps.InstanceMemory)) - c.organizationTotalAppInstancesQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Apps.TotalAppInstances)) - c.organizationTotalAppTasksQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Apps.PerAppTasks)) - c.organizationTotalMemoryMbQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Apps.TotalMemory)) - c.organizationTotalPrivateDomainsQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Domains.TotalDomains)) - c.organizationTotalReservedRoutePortsQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Routes.TotalReservedPorts)) - c.organizationTotalRoutesQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Routes.TotalRoutes)) - c.organizationTotalServiceKeysQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Services.TotalServiceKeys)) - c.organizationTotalServicesQuotaMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - ).Set(NullIntToFloat(quota.Services.TotalServiceInstances)) + err := c.reportOrg(cOrg, objs) + // 1. + if err != nil { + log.Warn(err) + res = err } - c.organizationInfoMetric.WithLabelValues( - cOrg.GUID, - cOrg.Name, - quotaName, - ).Set(float64(1)) } c.organizationInfoMetric.Collect(ch) @@ -354,5 +369,5 @@ func (c OrganizationsCollector) reportOrganizationsMetrics(objs *models.CFObject c.organizationTotalRoutesQuotaMetric.Collect(ch) c.organizationTotalServiceKeysQuotaMetric.Collect(ch) c.organizationTotalServicesQuotaMetric.Collect(ch) - return nil + return res } diff --git a/collectors/spaces.go b/collectors/spaces.go index 6430c781..616ca9a5 100644 --- a/collectors/spaces.go +++ b/collectors/spaces.go @@ -5,6 +5,7 @@ import ( "time" "code.cloudfoundry.org/cli/api/cloudcontroller/ccv3/constant" + "code.cloudfoundry.org/cli/resources" "github.com/bosh-prometheus/cf_exporter/models" "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" @@ -226,7 +227,6 @@ func (c SpacesCollector) Collect(objs *models.CFObjects, ch chan<- prometheus.Me } else { err := c.reportSpacesMetrics(objs, ch) if err != nil { - log.Error(err) errorMetric = float64(1) c.spacesScrapeErrorsTotalMetric.Inc() } @@ -261,10 +261,92 @@ func (c SpacesCollector) Describe(ch chan<- *prometheus.Desc) { c.lastSpacesScrapeDurationSecondsMetric.Describe(ch) } -// reportSpacesMetrics +// reportSpace // 1. rely on GUID value instead of map status because it // may exists in relationship but with empty value +func (c SpacesCollector) reportSpace(space resources.Space, objs *models.CFObjects) error { + relOrg, ok := space.Relationships[constant.RelationshipTypeOrganization] + if !ok { + return fmt.Errorf("could not find org relationship in space '%s'", space.GUID) + } + quotaName := "" + // 1. + relQuota := space.Relationships[constant.RelationshipTypeQuota] + if relQuota.GUID != "" { + quota, okQ := objs.SpaceQuotas[relQuota.GUID] + if !okQ { + return fmt.Errorf("could not find space quota '%s' from space '%s'", relQuota.GUID, space.GUID) + } + quotaName = quota.Name + c.spaceNonBasicServicesAllowedMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(BoolToFloat(quota.Services.PaidServicePlans)) + + c.spaceInstanceMemoryMbLimitMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Apps.InstanceMemory)) + + c.spaceTotalAppInstancesQuotaMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Apps.TotalAppInstances)) + + c.spaceTotalAppTasksQuotaMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Apps.PerAppTasks)) + + c.spaceTotalMemoryMbQuotaMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Apps.TotalMemory)) + + c.spaceTotalReservedRoutePortsQuotaMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Routes.TotalReservedPorts)) + + c.spaceTotalRoutesQuotaMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Routes.TotalRoutes)) + + c.spaceTotalServiceKeysQuotaMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Services.TotalServiceKeys)) + + c.spaceTotalServicesQuotaMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + ).Set(NullIntToFloat(quota.Services.TotalServiceInstances)) + } + + c.spaceInfoMetric.WithLabelValues( + space.GUID, + space.Name, + relOrg.GUID, + quotaName, + ).Set(float64(1)) + return nil +} + +// reportSpacesMetrics +// 1. continue processing application list upon error func (c SpacesCollector) reportSpacesMetrics(objs *models.CFObjects, ch chan<- prometheus.Metric) error { + var res error + c.spaceInfoMetric.Reset() c.spaceNonBasicServicesAllowedMetric.Reset() c.spaceInstanceMemoryMbLimitMetric.Reset() @@ -277,80 +359,12 @@ func (c SpacesCollector) reportSpacesMetrics(objs *models.CFObjects, ch chan<- p c.spaceTotalServicesQuotaMetric.Reset() for _, cSpace := range objs.Spaces { - relOrg, ok := cSpace.Relationships[constant.RelationshipTypeOrganization] - if !ok { - return fmt.Errorf("could not find org relationship in space '%s'", cSpace.GUID) - } - quotaName := "" + err := c.reportSpace(cSpace, objs) // 1. - relQuota := cSpace.Relationships[constant.RelationshipTypeQuota] - if relQuota.GUID != "" { - quota, okQ := objs.SpaceQuotas[relQuota.GUID] - if !okQ { - return fmt.Errorf("could not find space quota '%s' from space '%s'", relQuota.GUID, cSpace.GUID) - } - quotaName = quota.Name - c.spaceNonBasicServicesAllowedMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(BoolToFloat(quota.Services.PaidServicePlans)) - - c.spaceInstanceMemoryMbLimitMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Apps.InstanceMemory)) - - c.spaceTotalAppInstancesQuotaMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Apps.TotalAppInstances)) - - c.spaceTotalAppTasksQuotaMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Apps.PerAppTasks)) - - c.spaceTotalMemoryMbQuotaMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Apps.TotalMemory)) - - c.spaceTotalReservedRoutePortsQuotaMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Routes.TotalReservedPorts)) - - c.spaceTotalRoutesQuotaMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Routes.TotalRoutes)) - - c.spaceTotalServiceKeysQuotaMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Services.TotalServiceKeys)) - - c.spaceTotalServicesQuotaMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - ).Set(NullIntToFloat(quota.Services.TotalServiceInstances)) + if err != nil { + log.Warn(err) + res = err } - - c.spaceInfoMetric.WithLabelValues( - cSpace.GUID, - cSpace.Name, - relOrg.GUID, - quotaName, - ).Set(float64(1)) } c.spaceInfoMetric.Collect(ch) @@ -363,5 +377,5 @@ func (c SpacesCollector) reportSpacesMetrics(objs *models.CFObjects, ch chan<- p c.spaceTotalRoutesQuotaMetric.Collect(ch) c.spaceTotalServiceKeysQuotaMetric.Collect(ch) c.spaceTotalServicesQuotaMetric.Collect(ch) - return nil + return res } diff --git a/fetcher/fetcher.go b/fetcher/fetcher.go index 22b6bac3..a82aac38 100644 --- a/fetcher/fetcher.go +++ b/fetcher/fetcher.go @@ -82,7 +82,7 @@ func (c *Fetcher) fetch() *models.CFObjects { session, err := NewSessionExt(c.config) if err != nil { - log.Errorf("unable to initialize cloud foundry clients: %s", err) + log.WithError(err).Error("unable to initialize cloud foundry clients") result.Error = err return result } diff --git a/fetcher/fetcher_handlers.go b/fetcher/fetcher_handlers.go index 244a667a..b4123a8f 100644 --- a/fetcher/fetcher_handlers.go +++ b/fetcher/fetcher_handlers.go @@ -8,6 +8,7 @@ import ( "code.cloudfoundry.org/cli/resources" "github.com/bosh-prometheus/cf_exporter/filters" "github.com/bosh-prometheus/cf_exporter/models" + log "github.com/sirupsen/logrus" ) func loadIndex[T any](store map[string]T, objects []T, key func(T) string) { @@ -38,6 +39,9 @@ func (c *Fetcher) fetchOrgQuotas(session *SessionExt, entry *models.CFObjects) e return err } +// fetchSpaces +// 1. silent fail because space may have been deleted between listing and +// summary fetching attempt. See bosh-prometheus/cf_exporter#85 func (c *Fetcher) fetchSpaces(session *SessionExt, entry *models.CFObjects) error { spaces, _, _, err := session.V3().GetSpaces(LargeQuery) if err != nil { @@ -58,10 +62,14 @@ func (c *Fetcher) fetchSpaces(session *SessionExt, entry *models.CFObjects) erro entry.AppSummaries[app.GUID] = app } c.Unlock() + } else { + log.WithError(err).Warnf("could not fetch space '%s' summary", space.GUID) } - return err + // 1 + return nil }, filters.Applications) } + return nil }