Skip to content

Commit

Permalink
Fix retrying logic which was causing persistenceagent to crash loop. (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
neuromage authored and k8s-ci-robot committed Jan 5, 2019
1 parent ea72316 commit b97969f
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 13 deletions.
14 changes: 8 additions & 6 deletions backend/src/common/util/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,17 @@ import (
func WaitForAPIAvailable(initializeTimeout time.Duration, basePath string, apiAddress string) error {
var operation = func() error {
response, err := http.Get(fmt.Sprintf("http://%s%s/healthz", apiAddress, basePath))
if err == nil {
return nil
if err != nil {
return err
}
// we wait only on 503 service unavailable. Stop retry otherwise.
if response.StatusCode != 503 {

// If we get a 503 service unavailable, it's a non-retriable error.
if response.StatusCode == 503 {
return backoff.Permanent(errors.Wrapf(
err, "Waiting for ml pipeline API server failed with non retriable error."))
}
return err

return nil
}

b := backoff.NewExponentialBackOff()
Expand All @@ -49,7 +51,7 @@ func WaitForAPIAvailable(initializeTimeout time.Duration, basePath string, apiAd
}

func GetKubernetesClientFromClientConfig(clientConfig clientcmd.ClientConfig) (
*kubernetes.Clientset, *rest.Config, string, error) {
*kubernetes.Clientset, *rest.Config, string, error) {
// Get the clientConfig
config, err := clientConfig.ClientConfig()
if err != nil {
Expand Down
17 changes: 10 additions & 7 deletions backend/test/test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,23 @@ var runIntegrationTests = flag.Bool("runIntegrationTests", false, "Whether to al
func waitForReady(namespace string, initializeTimeout time.Duration) error {
var operation = func() error {
response, err := http.Get(fmt.Sprintf("http://ml-pipeline.%s.svc.cluster.local:8888/apis/v1beta1/healthz", namespace))
if err == nil {
return nil
if err != nil {
return err
}
// we wait only on 503 service unavailable. Stop retry otherwise.
if response.StatusCode != 503 {
return backoff.Permanent(errors.Wrapf(err, "Waiting for ml pipeline failed with non retriable error."))

// If we get a 503 service unavailable, it's a non-retriable error.
if response.StatusCode == 503 {
return backoff.Permanent(errors.Wrapf(
err, "Waiting for ml pipeline API server failed with non retriable error."))
}
return err

return nil
}

b := backoff.NewExponentialBackOff()
b.MaxElapsedTime = initializeTimeout
err := backoff.Retry(operation, b)
return errors.Wrapf(err, "Waiting for ml pipeline failed after all attempts.")
return errors.Wrapf(err, "Waiting for ml pipeline API server failed after all attempts.")
}

func getClientConfig(namespace string) clientcmd.ClientConfig {
Expand Down

0 comments on commit b97969f

Please sign in to comment.