Skip to content

Commit

Permalink
backoff, refactoring reconcile flow
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinWeindel committed May 18, 2020
1 parent 4ffb653 commit cea9052
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 45 deletions.
22 changes: 21 additions & 1 deletion examples/10-crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,27 @@ spec:
status:
description: CertificateStatus is the status of the certificate request.
properties:
backoff:
description: BackOff contains the state to back off failed certificate
requests
properties:
observedGeneration:
description: ObservedGeneration is the observed generation the BackOffState
is assigned to
format: int64
type: integer
recheckAfter:
description: RetryAfter is the timestamp this cert request is not
retried before.
format: date-time
type: string
recheckInterval:
description: RetryInterval is interval to wait for retrying.
type: string
required:
- recheckAfter
- recheckInterval
type: object
commonName:
description: CommonName is the current CN.
type: string
Expand Down Expand Up @@ -273,7 +294,6 @@ spec:
description: State is the certificate state.
type: string
required:
- lastPendingTimestamp
- state
type: object
required:
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/miekg/dns v1.1.27
github.com/onsi/ginkgo v1.10.1
github.com/onsi/gomega v1.7.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.1.0
github.com/prometheus/common v0.7.0 // indirect
github.com/prometheus/procfs v0.0.5 // indirect
Expand Down
15 changes: 14 additions & 1 deletion pkg/apis/cert/v1alpha1/certificate.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type Certificate struct {
// CertificateSpec is the spec of the certificate to request.
type CertificateSpec struct {
// CommonName is the CN for the certificate (max. 64 chars).
// +kubebuilder:validation:MaxLength=64
CommonName *string `json:"commonName,omitempty"`
// DNSNames are the optional additional domain names of the certificate.
// +optional
Expand All @@ -65,6 +66,16 @@ type IssuerRef struct {
Name string `json:"name"`
}

// BackOffState stores the status for exponential back off on repeated cert request failure
type BackOffState struct {
// ObservedGeneration is the observed generation the BackOffState is assigned to
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// RetryAfter is the timestamp this cert request is not retried before.
RetryAfter metav1.Time `json:"recheckAfter"`
// RetryInterval is interval to wait for retrying.
RetryInterval metav1.Duration `json:"recheckInterval"`
}

// CertificateStatus is the status of the certificate request.
type CertificateStatus struct {
// ObservedGeneration is the observed generation of the spec.
Expand All @@ -74,7 +85,7 @@ type CertificateStatus struct {
// Message is the status or error message.
Message *string `json:"message,omitempty"`
// LastPendingTimestamp contains the start timestamp of the last pending status.
LastPendingTimestamp *metav1.Time `json:"lastPendingTimestamp"`
LastPendingTimestamp *metav1.Time `json:"lastPendingTimestamp,omitempty"`
// CommonName is the current CN.
CommonName *string `json:"commonName,omitempty"`
// DNSNames are the current domain names.
Expand All @@ -83,6 +94,8 @@ type CertificateStatus struct {
IssuerRef *IssuerRefWithNamespace `json:"issuerRef,omitempty"`
// ExpirationDate shows the notAfter validity date.
ExpirationDate *string `json:"expirationDate,omitempty"`
// BackOff contains the state to back off failed certificate requests
BackOff *BackOffState `json:"backoff,omitempty"`
}

// IssuerRefWithNamespace is the full qualified issuer reference.
Expand Down
28 changes: 28 additions & 0 deletions pkg/apis/cert/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

141 changes: 98 additions & 43 deletions pkg/controller/issuer/certificate/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ import (
"unicode/utf8"

"github.com/go-acme/lego/v3/certificate"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
Expand Down Expand Up @@ -161,53 +163,81 @@ func (r *certReconciler) Reconcile(logger logger.LogContext, obj resources.Objec
return reconcile.Succeeded(logger)
}

if cert.Status.BackOff != nil &&
obj.GetGeneration() == cert.Status.BackOff.ObservedGeneration &&
time.Now().Before(cert.Status.BackOff.RetryAfter.Time) {
interval := cert.Status.BackOff.RetryAfter.Time.Sub(time.Now())
if interval < 30*time.Second {
interval = 30 * time.Second
}
return reconcile.Recheck(logger, fmt.Errorf("backoff"), interval)
}

r.support.AddCertificate(logger, cert)

if r.challengePending(cert) {
return reconcile.Recheck(logger, fmt.Errorf("challenge pending for at least one domain of certificate"), 30*time.Second)
}

if result := r.pendingResults.Remove(obj.ObjectName()); result != nil {
if result.Err != nil {
return r.failed(logger, obj, api.StateError, fmt.Errorf("obtaining certificate failed with %s", result.Err.Error()))
}
return r.handleObtainOutput(logger, obj, result)
}

spec := &api.CertificateSpec{
CommonName: result.CommonName,
DNSNames: result.DNSNames,
CSR: result.CSR,
IssuerRef: &api.IssuerRef{Name: result.IssuerName},
}
specHash := r.buildSpecHash(spec)
secretRef, err := r.writeCertificateSecret(cert.ObjectMeta, result.Certificates, specHash, cert.Spec.SecretName)
var secret *corev1.Secret
if cert.Spec.SecretRef != nil {
var err error
secret, err = r.loadSecret(cert.Spec.SecretRef)
if err != nil {
return r.failed(logger, obj, api.StateError, fmt.Errorf("writing certificate secret failed with %s", err.Error()))
if !apierrrors.IsNotFound(err) {
return r.failed(logger, obj, api.StateError, err)
}
// ignore if SecretRef is specified but not existing
secret = nil
}
logger.Infof("certificate written in secret %s/%s", secretRef.Namespace, secretRef.Name)
}

var notAfter *time.Time
cert, err := legobridge.DecodeCertificate(result.Certificates.Certificate)
if err == nil {
notAfter = &cert.NotAfter
if secret != nil {
specHash := r.buildSpecHash(&cert.Spec)
storedHash := cert.Labels[LabelCertificateHashKey]
if specHash != storedHash {
return r.deleteSecretRefAndRepeat(logger, obj)
}

return r.updateSecretRefAndSucceeded(logger, obj, secretRef, specHash, notAfter)
return r.checkForRenewAndSucceeded(logger, obj, secret)
}

if cert.Spec.SecretRef == nil {
if !r.lastPendingRateLimiting(cert.Status.LastPendingTimestamp) {
return r.obtainCertificateAndPending(logger, obj, nil)
}
if r.lastPendingRateLimiting(cert.Status.LastPendingTimestamp) {
remainingSeconds := r.lastPendingRateLimitingSeconds(cert.Status.LastPendingTimestamp)
return reconcile.Delay(logger, fmt.Errorf("waiting for end of pending rate limiting in %d seconds", remainingSeconds))
}
specHash := r.buildSpecHash(&cert.Spec)
storedHash := cert.Labels[LabelCertificateHashKey]
if specHash != storedHash {
return r.deleteSecretRefAndRepeat(logger, obj)
return r.obtainCertificateAndPending(logger, obj, nil)
}

func (r *certReconciler) handleObtainOutput(logger logger.LogContext, obj resources.Object, result *legobridge.ObtainOutput) reconcile.Status {
if result.Err != nil {
return r.failed(logger, obj, api.StateError, errors.Wrapf(result.Err, "obtaining certificate failed"))
}

return r.checkForRenewAndSucceeded(logger, obj)
cert, _ := obj.Data().(*api.Certificate)
spec := &api.CertificateSpec{
CommonName: result.CommonName,
DNSNames: result.DNSNames,
CSR: result.CSR,
IssuerRef: &api.IssuerRef{Name: result.IssuerName},
}
specHash := r.buildSpecHash(spec)
secretRef, err := r.writeCertificateSecret(cert.ObjectMeta, result.Certificates, specHash, cert.Spec.SecretName)
if err != nil {
return r.failed(logger, obj, api.StateError, errors.Wrapf(err, "writing certificate secret failed"))
}
logger.Infof("certificate written in secret %s/%s", secretRef.Namespace, secretRef.Name)

var notAfter *time.Time
x509cert, err := legobridge.DecodeCertificate(result.Certificates.Certificate)
if err == nil {
notAfter = &x509cert.NotAfter
}

return r.updateSecretRefAndSucceeded(logger, obj, secretRef, specHash, notAfter)
}

func (r *certReconciler) Deleted(logger logger.LogContext, key resources.ClusterObjectKey) reconcile.Status {
Expand Down Expand Up @@ -324,7 +354,7 @@ func (r *certReconciler) obtainCertificateAndPending(logger logger.LogContext, o
case *legobridge.ConcurrentObtainError:
return r.delay(logger, obj, api.StatePending, err)
default:
return r.failed(logger, obj, api.StateError, fmt.Errorf("preparing obtaining certificates with %s", err.Error()))
return r.failed(logger, obj, api.StateError, errors.Wrapf(err, "preparing obtaining certificates failed"))
}
}
r.pendingRequests.Add(objectName)
Expand Down Expand Up @@ -446,13 +476,9 @@ func (r *certReconciler) deleteSecret(secretRef *corev1.SecretReference) error {
return r.certSecretResources.DeleteByName(secret)
}

func (r *certReconciler) checkForRenewAndSucceeded(logger logger.LogContext, obj resources.Object) reconcile.Status {
func (r *certReconciler) checkForRenewAndSucceeded(logger logger.LogContext, obj resources.Object, secret *corev1.Secret) reconcile.Status {
crt := obj.Data().(*api.Certificate)

secret, err := r.loadSecret(crt.Spec.SecretRef)
if err != nil {
return r.failed(logger, obj, api.StateError, err)
}
cert, err := legobridge.DecodeCertificateFromSecretData(secret.Data)
if err != nil {
return r.failed(logger, obj, api.StateError, err)
Expand Down Expand Up @@ -619,14 +645,39 @@ func (r *certReconciler) deleteSecretRefAndRepeat(logger logger.LogContext, obj
return r.repeat(logger, obj2)
}

func (r *certReconciler) prepareUpdateStatus(obj resources.Object, state string, msg *string) (*resources.ModificationState, *api.CertificateStatus) {
func (r *certReconciler) prepareUpdateStatus(obj resources.Object, state string, msg *string, increaseBackOff bool) (*resources.ModificationState, *api.CertificateStatus) {
crt := obj.Data().(*api.Certificate)
status := &crt.Status

mod := resources.NewModificationState(obj)
mod.AssureStringPtrPtr(&status.Message, msg)
mod.AssureStringValue(&status.State, state)
mod.AssureInt64Value(&status.ObservedGeneration, obj.GetGeneration())
switch state {
case api.StateReady:
mod.Modify(status.BackOff != nil)
status.BackOff = nil
mod.Modify(status.LastPendingTimestamp != nil)
status.LastPendingTimestamp = nil
case api.StatePending:
// nothing to do
default:
if increaseBackOff {
interval := r.rateLimiting
if status.BackOff != nil && status.ObservedGeneration == status.BackOff.ObservedGeneration {
interval += status.BackOff.RetryInterval.Duration
if interval > 8*time.Hour {
interval = 8 * time.Hour
}
}
status.BackOff = &api.BackOffState{
ObservedGeneration: status.ObservedGeneration,
RetryAfter: metav1.Time{Time: time.Now().Add(interval)},
RetryInterval: metav1.Duration{Duration: interval},
}
mod.Modify(true)
}
}

cn := crt.Spec.CommonName
dnsNames := crt.Spec.DNSNames
Expand Down Expand Up @@ -663,39 +714,43 @@ func (r *certReconciler) updateStatus(mod *resources.ModificationState) {
}

func (r *certReconciler) failed(logger logger.LogContext, obj resources.Object, state string, err error) reconcile.Status {
return r.status(logger, obj, state, err)
}

func (r *certReconciler) status(logger logger.LogContext, obj resources.Object, state string, err error) reconcile.Status {
msg := err.Error()

mod, _ := r.prepareUpdateStatus(obj, state, &msg)
rerr, isRecoverable := err.(*recoverableError)
mod, _ := r.prepareUpdateStatus(obj, state, &msg, !isRecoverable)
r.updateStatus(mod)

if rerr, ok := err.(*recoverableError); ok {
if isRecoverable {
if rerr.Interval != 0 {
return reconcile.Recheck(logger, err, rerr.Interval)
}
return reconcile.Delay(logger, err)
}

return reconcile.Failed(logger, err)
}

func (r *certReconciler) delay(logger logger.LogContext, obj resources.Object, state string, err error) reconcile.Status {
return r.failed(logger, obj, state, &recoverableError{Msg: err.Error()})
return r.status(logger, obj, state, &recoverableError{Msg: err.Error()})
}

func (r *certReconciler) recheck(logger logger.LogContext, obj resources.Object, state string, err error, interval time.Duration) reconcile.Status {
return r.failed(logger, obj, state, &recoverableError{Msg: err.Error(), Interval: interval})
return r.status(logger, obj, state, &recoverableError{Msg: err.Error(), Interval: interval})
}

func (r *certReconciler) succeeded(logger logger.LogContext, obj resources.Object) reconcile.Status {
mod, _ := r.prepareUpdateStatus(obj, api.StateReady, nil)
mod, _ := r.prepareUpdateStatus(obj, api.StateReady, nil, false)
r.updateStatus(mod)

return reconcile.Succeeded(logger)
}

func (r *certReconciler) pending(logger logger.LogContext, obj resources.Object) reconcile.Status {
msg := "certificate requested, preparing/waiting for successful DNS01 challenge"
mod, status := r.prepareUpdateStatus(obj, api.StatePending, &msg)
mod, status := r.prepareUpdateStatus(obj, api.StatePending, &msg, false)
status.LastPendingTimestamp = &metav1.Time{Time: time.Now()}
mod.Modified = true
r.updateStatus(mod)
Expand All @@ -704,7 +759,7 @@ func (r *certReconciler) pending(logger logger.LogContext, obj resources.Object)
}

func (r *certReconciler) repeat(logger logger.LogContext, obj resources.Object) reconcile.Status {
mod, _ := r.prepareUpdateStatus(obj, "", nil)
mod, _ := r.prepareUpdateStatus(obj, "", nil, false)
r.updateStatus(mod)

return reconcile.Repeat(logger)
Expand Down

0 comments on commit cea9052

Please sign in to comment.