Skip to content

Commit

Permalink
Healthz overhaul (dapr#7518)
Browse files Browse the repository at this point in the history
* Healthz overhaul

Currently, the healthz endpoint on Dapr services do not represent the
actual health of the service as they are mostly only spinning up a HTTP
server and immediately returning a 200 OK. This results in services
receiving traffic before they are ready to actually serve requests. This
issue manifests in integration tests where, for example, no metrics are
available on the metrics endpoint even though the service reports as
healthy and therefore fails the test.

This PR overhauls the healthz package to allow registering health
targets during the service `New` process chain. Each module marks their
target as healthy and the healthz server reports the overall health of
the service. Although not perfect, service healthz endpoints should now
be more representative of the actual health of the program.

Signed-off-by: joshvanl <me@joshvanl.dev>

* Fix healthz server in injector

Signed-off-by: joshvanl <me@joshvanl.dev>

* Adds daprd healthz mtls check

Signed-off-by: joshvanl <me@joshvanl.dev>

* Fix healthz/daprd error string match for windows

Signed-off-by: joshvanl <me@joshvanl.dev>

* Fix reconciler unit test

Signed-off-by: joshvanl <me@joshvanl.dev>

* Name operator healthz target set

Signed-off-by: joshvanl <me@joshvanl.dev>

* Fix correct health var name for sub update informer

Signed-off-by: joshvanl <me@joshvanl.dev>

* Fix rebased unit tests

Signed-off-by: joshvanl <me@joshvanl.dev>

* Linting

Signed-off-by: joshvanl <me@joshvanl.dev>

* Return json marshal of placement state table for getter

Signed-off-by: joshvanl <me@joshvanl.dev>

* Linting

Signed-off-by: joshvanl <me@joshvanl.dev>

* feat: add new healthz to scheduler

Signed-off-by: mikeee <hey@mike.ee>

* chore: remove existing health handler

Signed-off-by: mikeee <hey@mike.ee>

* fix: add missing healthz

Signed-off-by: mikeee <hey@mike.ee>

* fix: init healthz server

Signed-off-by: mikeee <hey@mike.ee>

* Pass Healthz to the Scheduler Options in integration test

Signed-off-by: joshvanl <me@joshvanl.dev>

---------

Signed-off-by: joshvanl <me@joshvanl.dev>
Signed-off-by: mikeee <hey@mike.ee>
Co-authored-by: Dapr Bot <56698301+dapr-bot@users.noreply.github.com>
Co-authored-by: mikeee <hey@mike.ee>
  • Loading branch information
3 people authored Jun 27, 2024
1 parent bedb5c4 commit eb49e56
Show file tree
Hide file tree
Showing 72 changed files with 1,243 additions and 733 deletions.
21 changes: 16 additions & 5 deletions cmd/daprd/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
stateLoader "github.com/dapr/dapr/pkg/components/state"
wfbeLoader "github.com/dapr/dapr/pkg/components/wfbackend"
workflowsLoader "github.com/dapr/dapr/pkg/components/workflows"
"github.com/dapr/dapr/pkg/healthz"
"github.com/dapr/dapr/pkg/metrics"
"github.com/dapr/dapr/pkg/modes"
"github.com/dapr/dapr/pkg/runtime/registry"
"github.com/dapr/dapr/pkg/security"
Expand Down Expand Up @@ -114,6 +116,7 @@ func Run() {
WithWorkflowBackends(wfbeLoader.DefaultRegistry)

ctx := signals.Context()
healthz := healthz.New()
secProvider, err := security.New(ctx, security.Options{
SentryAddress: opts.SentryAddress,
ControlPlaneTrustDomain: opts.ControlPlaneTrustDomain,
Expand All @@ -122,6 +125,7 @@ func Run() {
AppID: opts.AppID,
MTLSEnabled: opts.EnableMTLS,
Mode: modes.DaprMode(opts.Mode),
Healthz: healthz,
})
if err != nil {
log.Fatal(err)
Expand Down Expand Up @@ -172,11 +176,18 @@ func Run() {
AppChannelAddress: opts.AppChannelAddress,
EnableAPILogging: opts.EnableAPILogging,
Config: opts.Config,
Metrics: opts.Metrics,
AppSSL: opts.AppSSL,
ComponentsPath: opts.ComponentsPath,
Registry: reg,
Security: sec,
Metrics: metrics.Options{
Enabled: opts.Metrics.Enabled(),
Log: log,
Port: opts.Metrics.Port(),
Namespace: metrics.DefaultMetricNamespace,
Healthz: healthz,
},
AppSSL: opts.AppSSL,
ComponentsPath: opts.ComponentsPath,
Registry: reg,
Security: sec,
Healthz: healthz,
})
if rerr != nil {
return rerr
Expand Down
4 changes: 2 additions & 2 deletions cmd/daprd/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ type Options struct {
AppHealthCheckPath string
AppChannelAddress string
Logger logger.Options
Metrics *metrics.Options
Metrics *metrics.FlagOptions
}

func New(origArgs []string) (*Options, error) {
Expand Down Expand Up @@ -176,7 +176,7 @@ func New(origArgs []string) (*Options, error) {
opts.Logger = logger.DefaultOptions()
opts.Logger.AttachCmdFlags(fs.StringVar, fs.BoolVar)

opts.Metrics = metrics.DefaultMetricOptions()
opts.Metrics = metrics.DefaultFlagOptions()
opts.Metrics.AttachCmdFlags(fs.StringVar, fs.BoolVar)

// Ignore errors; flagset is set for ExitOnError
Expand Down
49 changes: 25 additions & 24 deletions cmd/injector/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package app
import (
"context"
"encoding/base64"
"fmt"
"os"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -25,7 +24,8 @@ import (
"github.com/dapr/dapr/cmd/injector/options"
"github.com/dapr/dapr/pkg/buildinfo"
scheme "github.com/dapr/dapr/pkg/client/clientset/versioned"
"github.com/dapr/dapr/pkg/health"
"github.com/dapr/dapr/pkg/healthz"
healthzserver "github.com/dapr/dapr/pkg/healthz/server"
"github.com/dapr/dapr/pkg/injector/service"
"github.com/dapr/dapr/pkg/metrics"
"github.com/dapr/dapr/pkg/modes"
Expand All @@ -50,7 +50,14 @@ func Run() {
log.Infof("Starting Dapr Sidecar Injector -- version %s -- commit %s", buildinfo.Version(), buildinfo.Commit())
log.Infof("Log level set to: %s", opts.Logger.OutputLevel)

metricsExporter := metrics.NewExporterWithOptions(log, metrics.DefaultMetricNamespace, opts.Metrics)
healthz := healthz.New()
metricsExporter := metrics.New(metrics.Options{
Log: log,
Enabled: opts.Metrics.Enabled(),
Namespace: metrics.DefaultMetricNamespace,
Port: opts.Metrics.Port(),
Healthz: healthz,
})

err = utils.SetEnvVariables(map[string]string{
utils.KubeConfigVar: opts.Kubeconfig,
Expand Down Expand Up @@ -95,6 +102,7 @@ func Run() {
AppID: "dapr-injector",
MTLSEnabled: true,
Mode: modes.KubernetesMode,
Healthz: healthz,
})
if err != nil {
log.Fatal(err)
Expand All @@ -109,16 +117,23 @@ func Run() {
KubeClient: kubeClient,
ControlPlaneNamespace: security.CurrentNamespace(),
ControlPlaneTrustDomain: cfg.ControlPlaneTrustDomain,
Healthz: healthz,
})
if err != nil {
log.Fatalf("Error creating injector: %v", err)
}

healthzServer := health.NewServer(health.Options{Log: log})
webConfHealthTarget := healthz.AddTarget()

caBundleCh := make(chan []byte)
mngr := concurrency.NewRunnerManager(
metricsExporter.Run,
metricsExporter.Start,
secProvider.Run,
healthzserver.New(healthzserver.Options{
Log: log,
Port: opts.HealthzPort,
Healthz: healthz,
}).Start,
func(ctx context.Context) error {
sec, rerr := secProvider.Handler(ctx)
if rerr != nil {
Expand All @@ -135,25 +150,9 @@ func Run() {
)
},
func(ctx context.Context) error {
readyErr := inj.Ready(ctx)
if readyErr != nil {
return readyErr
}
healthzServer.Ready()
<-ctx.Done()
return nil
},
func(ctx context.Context) error {
healhtzErr := healthzServer.Run(ctx, opts.HealthzListenAddress, opts.HealthzPort)
if healhtzErr != nil {
return fmt.Errorf("failed to start healthz server: %w", healhtzErr)
}
return nil
},
func(ctx context.Context) error {
sec, rErr := secProvider.Handler(ctx)
if rErr != nil {
return rErr
sec, rerr := secProvider.Handler(ctx)
if rerr != nil {
return rerr
}
sec.WatchTrustAnchors(ctx, caBundleCh)
return nil
Expand Down Expand Up @@ -185,6 +184,8 @@ func Run() {
return rErr
}

webConfHealthTarget.Ready()

select {
case caBundle = <-caBundleCh:
case <-ctx.Done():
Expand Down
4 changes: 2 additions & 2 deletions cmd/injector/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type Options struct {
Port int
ListenAddress string
Logger logger.Options
Metrics *metrics.Options
Metrics *metrics.FlagOptions
}

func New(origArgs []string) *Options {
Expand Down Expand Up @@ -69,7 +69,7 @@ func New(origArgs []string) *Options {
opts.Logger = logger.DefaultOptions()
opts.Logger.AttachCmdFlags(fs.StringVar, fs.BoolVar)

opts.Metrics = metrics.DefaultMetricOptions()
opts.Metrics = metrics.DefaultFlagOptions()
opts.Metrics.AttachCmdFlags(fs.StringVar, fs.BoolVar)

// Ignore errors; flagset is set for ExitOnError
Expand Down
23 changes: 18 additions & 5 deletions cmd/operator/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package app
import (
"github.com/dapr/dapr/cmd/operator/options"
"github.com/dapr/dapr/pkg/buildinfo"
"github.com/dapr/dapr/pkg/healthz"
"github.com/dapr/dapr/pkg/healthz/server"
"github.com/dapr/dapr/pkg/metrics"
"github.com/dapr/dapr/pkg/operator"
"github.com/dapr/dapr/pkg/operator/monitoring"
Expand All @@ -37,7 +39,14 @@ func Run() {
log.Infof("Starting Dapr Operator -- version %s -- commit %s", buildinfo.Version(), buildinfo.Commit())
log.Infof("Log level set to: %s", opts.Logger.OutputLevel)

metricsExporter := metrics.NewExporterWithOptions(log, metrics.DefaultMetricNamespace, opts.Metrics)
healthz := healthz.New()
metricsExporter := metrics.New(metrics.Options{
Log: log,
Enabled: opts.Metrics.Enabled(),
Namespace: metrics.DefaultMetricNamespace,
Port: opts.Metrics.Port(),
Healthz: healthz,
})

if err := monitoring.InitMetrics(); err != nil {
log.Fatal(err)
Expand All @@ -57,18 +66,22 @@ func Run() {
WatchdogCanPatchPodLabels: opts.WatchdogCanPatchPodLabels,
APIPort: opts.APIPort,
APIListenAddress: opts.APIListenAddress,
HealthzPort: opts.HealthzPort,
HealthzListenAddress: opts.HealthzListenAddress,
WebhookServerPort: opts.WebhookServerPort,
WebhookServerListenAddress: opts.WebhookServerListenAddress,
Healthz: healthz,
})
if err != nil {
log.Fatalf("error creating operator: %v", err)
}

err = concurrency.NewRunnerManager(
metricsExporter.Run,
op.Run,
metricsExporter.Start,
op.Start,
server.New(server.Options{
Log: log,
Port: opts.HealthzPort,
Healthz: healthz,
}).Start,
).Run(ctx)
if err != nil {
log.Fatalf("error running operator: %v", err)
Expand Down
4 changes: 2 additions & 2 deletions cmd/operator/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ type Options struct {
WatchdogCanPatchPodLabels bool
TrustAnchorsFile string
Logger logger.Options
Metrics *metrics.Options
Metrics *metrics.FlagOptions
APIPort int
APIListenAddress string
HealthzPort int
Expand Down Expand Up @@ -91,7 +91,7 @@ func New() *Options {
opts.Logger = logger.DefaultOptions()
opts.Logger.AttachCmdFlags(flag.StringVar, flag.BoolVar)

opts.Metrics = metrics.DefaultMetricOptions()
opts.Metrics = metrics.DefaultFlagOptions()
opts.Metrics.AttachCmdFlags(flag.StringVar, flag.BoolVar)

flag.Parse()
Expand Down
Loading

0 comments on commit eb49e56

Please sign in to comment.