Skip to content

Commit

Permalink
Allow halting (and restarting) pods on certificate errors
Browse files Browse the repository at this point in the history
General practice in Kubernetes is to restart pods relying on
certificates when those certificates are changed. Because Kubernetes
isn't aware of cross-cluster certificate changes, this doesn't happen
automatically when the broker certificate (or trust chain) changes;
this produces certificate errors and ultimately results in a broken
setup.

To avoid this, provide a setting to set up gateway and Lighthouse
agent pods to halt on certificate errors.

Signed-off-by: Stephen Kitt <skitt@redhat.com>
  • Loading branch information
skitt authored and tpantelis committed Oct 25, 2023
1 parent 74aaef5 commit c75f06f
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 0 deletions.
1 change: 1 addition & 0 deletions api/v1alpha1/servicediscovery_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type ServiceDiscoverySpec struct {
Debug bool `json:"debug"`
GlobalnetEnabled bool `json:"globalnetEnabled,omitempty"`
BrokerK8sInsecure bool `json:"brokerK8sInsecure,omitempty"`
HaltOnCertificateError bool `json:"haltOnCertificateError,omitempty"`
CoreDNSCustomConfig *CoreDNSCustomConfig `json:"coreDNSCustomConfig,omitempty"`
// +listType=set
CustomDomains []string `json:"customDomains,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/submariner_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ type SubmarinerSpec struct {

BrokerK8sInsecure bool `json:"brokerK8sInsecure,omitempty"`

// Halt on certificate error (so the pod gets restarted).
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Halt (and restart) on certificate error"
// +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors={"urn:alm:descriptor:com.tectonic.ui:booleanSwitch"}
HaltOnCertificateError bool `json:"haltOnCertificateError"`

// Name of the custom CoreDNS configmap to configure forwarding to Lighthouse.
// It should be in <namespace>/<name> format where <namespace> is optional and defaults to kube-system.
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="CoreDNS Custom Config"
Expand Down
2 changes: 2 additions & 0 deletions config/crd/bases/submariner.io_servicediscoveries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ spec:
type: boolean
globalnetEnabled:
type: boolean
haltOnCertificateError:
type: boolean
imageOverrides:
additionalProperties:
type: string
Expand Down
3 changes: 3 additions & 0 deletions config/crd/bases/submariner.io_submariners.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ spec:
description: The Global CIDR super-net range for allocating GlobalCIDRs
to each cluster.
type: string
haltOnCertificateError:
description: Halt on certificate error (so the pod gets restarted).
type: boolean
imageOverrides:
additionalProperties:
type: string
Expand Down
5 changes: 5 additions & 0 deletions config/manifests/bases/submariner.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ spec:
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:text
- urn:alm:descriptor:com.tectonic.ui:advanced
- description: Halt on certificate error (so the pod gets restarted).
displayName: Halt (and restart) on certificate error
path: haltOnCertificateError
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:booleanSwitch
- description: Override component images.
displayName: Image Overrides
path: imageOverrides
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ func newLighthouseAgent(cr *submarinerv1alpha1.ServiceDiscovery, name string) *a
{Name: "SUBMARINER_CLUSTERID", Value: cr.Spec.ClusterID},
{Name: "SUBMARINER_DEBUG", Value: strconv.FormatBool(cr.Spec.Debug)},
{Name: "SUBMARINER_GLOBALNET_ENABLED", Value: strconv.FormatBool(cr.Spec.GlobalnetEnabled)},
{Name: "SUBMARINER_HALT_ON_CERT_ERROR", Value: strconv.FormatBool(cr.Spec.HaltOnCertificateError)},
{Name: broker.EnvironmentVariable("ApiServer"), Value: cr.Spec.BrokerK8sApiServer},
{Name: broker.EnvironmentVariable("ApiServerToken"), Value: cr.Spec.BrokerK8sApiServerToken},
{Name: broker.EnvironmentVariable("RemoteNamespace"), Value: cr.Spec.BrokerK8sRemoteNamespace},
Expand Down
1 change: 1 addition & 0 deletions controllers/submariner/gateway_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ func newGatewayPodTemplate(cr *v1alpha1.Submariner, name string, podSelectorLabe
{Name: "SUBMARINER_HEALTHCHECKINTERVAL", Value: strconv.FormatUint(healthCheckInterval, 10)},
{Name: "SUBMARINER_HEALTHCHECKMAXPACKETLOSSCOUNT", Value: strconv.FormatUint(healthCheckMaxPacketLossCount, 10)},
{Name: "SUBMARINER_METRICSPORT", Value: gatewayMetricsServerPort},
{Name: "SUBMARINER_HALT_ON_CERT_ERROR", Value: strconv.FormatBool(cr.Spec.HaltOnCertificateError)},
{Name: "NODE_NAME", ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "spec.nodeName",
Expand Down
1 change: 1 addition & 0 deletions controllers/submariner/servicediscovery_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func (r *Reconciler) serviceDiscoveryReconciler(ctx context.Context, submariner
BrokerK8sApiServerToken: submariner.Spec.BrokerK8sApiServerToken,
BrokerK8sApiServer: submariner.Spec.BrokerK8sApiServer,
BrokerK8sInsecure: submariner.Spec.BrokerK8sInsecure,
HaltOnCertificateError: submariner.Spec.HaltOnCertificateError,
Debug: submariner.Spec.Debug,
ClusterID: submariner.Spec.ClusterID,
Namespace: submariner.Spec.Namespace,
Expand Down
6 changes: 6 additions & 0 deletions pkg/embeddedyamls/yamls.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ spec:
description: The Global CIDR super-net range for allocating GlobalCIDRs
to each cluster.
type: string
haltOnCertificateError:
description: Halt on certificate error (so the pod gets restarted).
type: boolean
imageOverrides:
additionalProperties:
type: string
Expand Down Expand Up @@ -297,6 +300,7 @@ spec:
- clusterCIDR
- clusterID
- debug
- haltOnCertificateError
- namespace
- natEnabled
- serviceCIDR
Expand Down Expand Up @@ -1113,6 +1117,8 @@ spec:
type: boolean
globalnetEnabled:
type: boolean
haltOnCertificateError:
type: boolean
imageOverrides:
additionalProperties:
type: string
Expand Down

0 comments on commit c75f06f

Please sign in to comment.