Skip to content

Commit

Permalink
add healthz and livez endpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
Cory Schwartz committed May 21, 2022
1 parent effee8c commit 444d0b1
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 5 deletions.
2 changes: 1 addition & 1 deletion cmd/lotus-gateway/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ var runCmd = &cli.Command{
}

gwapi := gateway.NewNode(api, lookbackCap, waitLookback)
h, err := gateway.Handler(gwapi, serverOptions...)
h, err := gateway.Handler(gwapi, api, serverOptions...)
if err != nil {
return xerrors.Errorf("failed to set up gateway HTTP handler")
}
Expand Down
11 changes: 7 additions & 4 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@ import (

"contrib.go.opencensus.io/exporter/prometheus"
"github.com/filecoin-project/go-jsonrpc"
"github.com/filecoin-project/lotus/api"
lapi "github.com/filecoin-project/lotus/api"
"github.com/filecoin-project/lotus/api/v0api"
"github.com/filecoin-project/lotus/api/v1api"
"github.com/filecoin-project/lotus/metrics/proxy"
"github.com/filecoin-project/lotus/node"
"github.com/gorilla/mux"
promclient "github.com/prometheus/client_golang/prometheus"
)

// Handler returns a gateway http.Handler, to be mounted as-is on the server.
func Handler(a api.Gateway, opts ...jsonrpc.ServerOption) (http.Handler, error) {
func Handler(gwapi lapi.Gateway, api lapi.FullNode, opts ...jsonrpc.ServerOption) (http.Handler, error) {
m := mux.NewRouter()

serveRpc := func(path string, hnd interface{}) {
Expand All @@ -23,10 +24,10 @@ func Handler(a api.Gateway, opts ...jsonrpc.ServerOption) (http.Handler, error)
m.Handle(path, rpcServer)
}

ma := proxy.MetricedGatewayAPI(a)
ma := proxy.MetricedGatewayAPI(gwapi)

serveRpc("/rpc/v1", ma)
serveRpc("/rpc/v0", api.Wrap(new(v1api.FullNodeStruct), new(v0api.WrapperV1Full), ma))
serveRpc("/rpc/v0", lapi.Wrap(new(v1api.FullNodeStruct), new(v0api.WrapperV1Full), ma))

registry := promclient.DefaultRegisterer.(*promclient.Registry)
exporter, err := prometheus.NewExporter(prometheus.Options{
Expand All @@ -37,6 +38,8 @@ func Handler(a api.Gateway, opts ...jsonrpc.ServerOption) (http.Handler, error)
return nil, err
}
m.Handle("/debug/metrics", exporter)
m.Handle("/health/livez", node.NewLiveHandler(api))
m.Handle("/health/readyz", node.NewReadyHandler(api))
m.PathPrefix("/").Handler(http.DefaultServeMux)

/*ah := &auth.Handler{
Expand Down
81 changes: 81 additions & 0 deletions node/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package node

import (
"context"
"net/http"
"time"

lapi "github.com/filecoin-project/lotus/api"
"github.com/libp2p/go-libp2p-core/network"
)

type HealthHandler struct {
healthy bool
}

func (h *HealthHandler) SetHealthy(healthy bool) {
h.healthy = healthy
}

func (h *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if !h.healthy {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
w.WriteHeader(http.StatusOK)
}

// The backend is considered alive so long as there have been recent
// head changes. Being alive doesn't mean we are up to date, just moving.
func NewLiveHandler(api lapi.FullNode) *HealthHandler {
ctx := context.Background()
h := HealthHandler{}
go func() {
const reset = 5
var countdown = 0
minutely := time.NewTicker(time.Minute)
headCh, err := api.ChainNotify(ctx)
if err != nil {
//TODO
}
for {
select {
case <-minutely.C:
countdown = countdown - 1
if countdown == 0 {
h.SetHealthy(false)
}
case <-headCh:
countdown = reset
h.SetHealthy(true)
}
}
}()
return &h
}

// Check if we are ready to handle traffic.
// 1. sync workers are caught up.
// 2
func NewReadyHandler(api lapi.FullNode) *HealthHandler {
ctx := context.Background()
h := HealthHandler{}
go func() {
const heightTolerance = uint64(5)
var nethealth, synchealth bool
minutely := time.NewTicker(time.Minute)
for {
select {
case <-minutely.C:
netstat, err := api.NetAutoNatStatus(ctx)
nethealth = err == nil && netstat.Reachability != network.ReachabilityUnknown

nodestat, err := api.NodeStatus(ctx, false)
synchealth = err == nil && nodestat.SyncStatus.Behind < heightTolerance

h.SetHealthy(nethealth && synchealth)
}
}
}()
return &h
}
2 changes: 2 additions & 0 deletions node/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ func FullNodeHandler(a v1api.FullNode, permissioned bool, opts ...jsonrpc.Server
m.Handle("/debug/pprof-set/mutex", handleFractionOpt("MutexProfileFraction", func(x int) {
runtime.SetMutexProfileFraction(x)
}))
m.Handle("/health/livez", NewLiveHandler(a))
m.Handle("/health/readyz", NewReadyHandler(a))
m.PathPrefix("/").Handler(http.DefaultServeMux) // pprof

return m, nil
Expand Down

0 comments on commit 444d0b1

Please sign in to comment.