From 607381208bbfb94be1b4b49d30db3e9711087777 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 14 Jul 2020 14:59:26 -0400 Subject: [PATCH 1/3] [Elastic Agent] Handle 429 response from the server and adjust backoff When enrolling and the server currently handle to many concurrent request it will return a 429 status code. The enroll subcommand will retry to enroll with an exponential backoff. (Init 15sec and max 10mins) This also adjust the backoff logic in the ACK. Requires: https://github.com/elastic/kibana/pull/71552 --- .../pkg/agent/application/fleet_gateway.go | 4 ++-- x-pack/elastic-agent/pkg/agent/cmd/enroll.go | 15 +++++++++++++++ x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go | 7 +++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go index 2856cd83abf7..fb751925483e 100644 --- a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go +++ b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go @@ -23,8 +23,8 @@ var defaultGatewaySettings = &fleetGatewaySettings{ Duration: 1 * time.Second, // time between successful calls Jitter: 500 * time.Millisecond, // used as a jitter for duration Backoff: backoffSettings{ // time after a failed call - Init: 5 * time.Second, - Max: 60 * time.Second, + Init: 15 * time.Second, + Max: 10 * time.Minute, }, } diff --git a/x-pack/elastic-agent/pkg/agent/cmd/enroll.go b/x-pack/elastic-agent/pkg/agent/cmd/enroll.go index 99140951497d..73e40196e96b 100644 --- a/x-pack/elastic-agent/pkg/agent/cmd/enroll.go +++ b/x-pack/elastic-agent/pkg/agent/cmd/enroll.go @@ -12,6 +12,7 @@ import ( "github.com/spf13/cobra" + "github.com/elastic/beats/v7/libbeat/common/backoff" c "github.com/elastic/beats/v7/libbeat/common/cli" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/application" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/configuration" @@ -20,6 +21,7 @@ import ( "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/cli" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/config" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/logger" + "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/fleetapi" ) var defaultDelay = 1 * time.Second @@ -116,6 +118,19 @@ func enroll(streams *cli.IOStreams, cmd *cobra.Command, flags *globalFlags, args } err = c.Execute() + signal := make(chan struct{}) + + backExp := backoff.NewExpBackoff(signal, 30*time.Second, 10*time.Minute) + + for err == fleetapi.ErrTooManyRequests { + fmt.Fprintln(streams.Out, "Too many requests on the remote server, will retry in a moment.") + backExp.Wait() + fmt.Fprintln(streams.Out, "Retrying to enroll...") + err = c.Execute() + } + + close(signal) + if err != nil { return errors.New(err, "fail to enroll") } diff --git a/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go b/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go index 0d2784ef7419..55955f3edd56 100644 --- a/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go +++ b/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go @@ -21,6 +21,9 @@ import ( // EnrollType is the type of enrollment to do with the elastic-agent. type EnrollType string +// ErrTooManyRequests is received when the remote server is overloaded. +var ErrTooManyRequests = errors.New("too many requests received (429)") + const ( // PermanentEnroll is default enrollment type, by default an Agent is permanently enroll to Agent. PermanentEnroll = EnrollType("PERMANENT") @@ -190,6 +193,10 @@ func (e *EnrollCmd) Execute(ctx context.Context, r *EnrollRequest) (*EnrollRespo } defer resp.Body.Close() + if resp.StatusCode == http.StatusTooManyRequests { + return nil, ErrTooManyRequests + } + if resp.StatusCode != http.StatusOK { return nil, extract(resp.Body) } From 315254ad0949d2dcfe1dd014d1a7ca512aba9321 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 14 Jul 2020 15:53:56 -0400 Subject: [PATCH 2/3] changelog --- x-pack/elastic-agent/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/elastic-agent/CHANGELOG.asciidoc b/x-pack/elastic-agent/CHANGELOG.asciidoc index a5ec2d514b49..e6b764fbc2af 100644 --- a/x-pack/elastic-agent/CHANGELOG.asciidoc +++ b/x-pack/elastic-agent/CHANGELOG.asciidoc @@ -86,3 +86,4 @@ - Configuration cleanup {pull}19848[19848] - Agent now sends its own logs to elasticsearch {pull}19811[19811] - Add --insecure option to enroll command {pull}19900[19900] +- Will retry to enroll if the server return a 429. {pull}19918[19811] From 549047753a839b193547d101f90478843577fb00 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 14 Jul 2020 16:35:59 -0400 Subject: [PATCH 3/3] Change values --- x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go | 2 +- x-pack/elastic-agent/pkg/agent/cmd/enroll.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go index fb751925483e..cd94380e6739 100644 --- a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go +++ b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go @@ -23,7 +23,7 @@ var defaultGatewaySettings = &fleetGatewaySettings{ Duration: 1 * time.Second, // time between successful calls Jitter: 500 * time.Millisecond, // used as a jitter for duration Backoff: backoffSettings{ // time after a failed call - Init: 15 * time.Second, + Init: 60 * time.Second, Max: 10 * time.Minute, }, } diff --git a/x-pack/elastic-agent/pkg/agent/cmd/enroll.go b/x-pack/elastic-agent/pkg/agent/cmd/enroll.go index 73e40196e96b..c7243926ced8 100644 --- a/x-pack/elastic-agent/pkg/agent/cmd/enroll.go +++ b/x-pack/elastic-agent/pkg/agent/cmd/enroll.go @@ -120,7 +120,7 @@ func enroll(streams *cli.IOStreams, cmd *cobra.Command, flags *globalFlags, args err = c.Execute() signal := make(chan struct{}) - backExp := backoff.NewExpBackoff(signal, 30*time.Second, 10*time.Minute) + backExp := backoff.NewExpBackoff(signal, 60*time.Second, 10*time.Minute) for err == fleetapi.ErrTooManyRequests { fmt.Fprintln(streams.Out, "Too many requests on the remote server, will retry in a moment.")