diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index dbfebcbca8612..bfdf90603f6bf 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [6195](https://github.com/grafana/loki/pull/6195) **periklis**: Add ruler config support - [6198](https://github.com/grafana/loki/pull/6198) **periklis**: Add support for custom S3 CA - [6199](https://github.com/grafana/loki/pull/6199) **Red-GV**: Update GCP secret volume path - [6125](https://github.com/grafana/loki/pull/6125) **sasagarw**: Add method to get authenticated from GCP diff --git a/operator/PROJECT b/operator/PROJECT index 208dd422dbc0c..9b5697e1cd5b2 100644 --- a/operator/PROJECT +++ b/operator/PROJECT @@ -40,4 +40,13 @@ resources: webhooks: validation: true webhookVersion: v1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: grafana.com + group: loki + kind: RulerConfig + path: github.com/grafana/loki/operator/api/v1beta1 + version: v1beta1 version: "3" diff --git a/operator/api/v1beta1/lokistack_types.go b/operator/api/v1beta1/lokistack_types.go index 77246a4704a02..45d02730c5660 100644 --- a/operator/api/v1beta1/lokistack_types.go +++ b/operator/api/v1beta1/lokistack_types.go @@ -635,6 +635,11 @@ const ( ReasonMissingObjectStorageCAConfigMap LokiStackConditionReason = "MissingObjectStorageCAConfigMap" // ReasonInvalidObjectStorageCAConfigMap when the format of the CA configmap is invalid. ReasonInvalidObjectStorageCAConfigMap LokiStackConditionReason = "InvalidObjectStorageCAConfigMap" + // ReasonMissingRulerSecret when the required secret to authorization remote write connections + // for the ruler is missing. + ReasonMissingRulerSecret LokiStackConditionReason = "MissingRulerSecret" + // ReasonInvalidRulerSecret when the format of the ruler remote write authorization secret is invalid. + ReasonInvalidRulerSecret LokiStackConditionReason = "InvalidRulerSecret" // ReasonInvalidReplicationConfiguration when the configurated replication factor is not valid // with the select cluster size. ReasonInvalidReplicationConfiguration LokiStackConditionReason = "InvalidReplicationConfiguration" diff --git a/operator/api/v1beta1/rulerconfig_types.go b/operator/api/v1beta1/rulerconfig_types.go new file mode 100644 index 0000000000000..1de1eb69475d6 --- /dev/null +++ b/operator/api/v1beta1/rulerconfig_types.go @@ -0,0 +1,422 @@ +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// AlertManagerDiscoverySpec defines the configuration to use DNS resolution for AlertManager hosts. +type AlertManagerDiscoverySpec struct { + // Use DNS SRV records to discover Alertmanager hosts. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Enable SRV" + EnableSRV bool `json:"enableSRV"` + + // How long to wait between refreshing DNS resolutions of Alertmanager hosts. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="1m" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Refresh Interval" + RefreshInterval PrometheusDuration `json:"refreshInterval,omitempty"` +} + +// AlertManagerNotificationQueueSpec defines the configuration for AlertManager notification settings. +type AlertManagerNotificationQueueSpec struct { + // Capacity of the queue for notifications to be sent to the Alertmanager. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:=10000 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Notification Queue Capacity" + Capacity int32 `json:"capacity,omitempty"` + + // HTTP timeout duration when sending notifications to the Alertmanager. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="10s" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Timeout" + Timeout PrometheusDuration `json:"timeout,omitempty"` + + // Max time to tolerate outage for restoring "for" state of alert. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="1h" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Outage Tolerance" + ForOutageTolerance PrometheusDuration `json:"forOutageTolerance,omitempty"` + + // Minimum duration between alert and restored "for" state. This is maintained + // only for alerts with configured "for" time greater than the grace period. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="10m" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Firing Grace Period" + ForGracePeriod PrometheusDuration `json:"forGracePeriod,omitempty"` + + // Minimum amount of time to wait before resending an alert to Alertmanager. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="1m" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Resend Delay" + ResendDelay PrometheusDuration `json:"resendDelay,omitempty"` +} + +// AlertManagerSpec defines the configuration for ruler's alertmanager connectivity. +type AlertManagerSpec struct { + // URL for alerts return path. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Alert External URL" + ExternalURL string `json:"externalUrl,omitempty"` + + // Additional labels to add to all alerts. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Extra Alert Labels" + ExternalLabels map[string]string `json:"externalLabels,omitempty"` + + // If enabled, then requests to Alertmanager use the v2 API. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch",displayName="Enable AlertManager V2 API" + EnableV2 bool `json:"enableV2"` + + // List of AlertManager URLs to send notifications to. Each Alertmanager URL is treated as + // a separate group in the configuration. Multiple Alertmanagers in HA per group can be + // supported by using DNS resolution (See EnableDNSDiscovery). + // + // +required + // +kubebuilder:validation:Required + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="AlertManager Endpoints" + Endpoints []string `json:"endpoints"` + + // Defines the configuration for DNS-based discovery of AlertManager hosts. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced",displayName="DNS Discovery" + DiscoverySpec *AlertManagerDiscoverySpec `json:"discovery,omitempty"` + + // Defines the configuration for the notification queue to AlertManager hosts. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced",displayName="Notification Queue" + NotificationQueueSpec *AlertManagerNotificationQueueSpec `json:"notificationQueue,omitempty"` +} + +// RemoteWriteAuthType defines the type of authorization to use to access the remote write endpoint. +// +// +kubebuilder:validation:Enum=basic;header +type RemoteWriteAuthType string + +const ( + // BasicAuthorization defines the remote write client to use HTTP basic authorization. + BasicAuthorization RemoteWriteAuthType = "basic" + // BearerAuthorization defines the remote write client to use HTTP bearer authorization. + BearerAuthorization RemoteWriteAuthType = "bearer" +) + +// RemoteWriteClientSpec defines the configuration of the remote write client. +type RemoteWriteClientSpec struct { + // Name of the remote write config, which if specified must be unique among remote write configs. + // + // +required + // +kubebuilder:validation:Required + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Name" + Name string `json:"name"` + + // The URL of the endpoint to send samples to. + // + // +required + // +kubebuilder:validation:Required + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Endpoint" + URL string `json:"url"` + + // Timeout for requests to the remote write endpoint. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="30s" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Remote Write Timeout" + Timeout PrometheusDuration `json:"timeout,omitempty"` + + // Type of authorzation to use to access the remote write endpoint + // + // +required + // +kubebuilder:validation:Required + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors={"urn:alm:descriptor:com.tectonic.ui:select:basic","urn:alm:descriptor:com.tectonic.ui:select:header"},displayName="Authorization Type" + AuthorizationType RemoteWriteAuthType `json:"authorization"` + + // Name of a secret in the namespace configured for authorization secrets. + // + // +required + // +kubebuilder:validation:Required + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:io.kubernetes:Secret",displayName="Authorization Secret Name" + AuthorizationSecretName string `json:"authorizationSecretName"` + + // Additional HTTP headers to be sent along with each remote write request. + // + // +optional + // +kubebuilder:validation:Optional + AdditionalHeaders map[string]string `json:"additionalHeaders,omitempty"` + + // List of remote write relabel configurations. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Metric Relabel Configuration" + RelabelConfigs []RelabelConfig `json:"relabelConfigs,omitempty"` + + // Optional proxy URL. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="HTTP Proxy URL" + ProxyURL string `json:"proxyUrl,omitempty"` + + // Configure whether HTTP requests follow HTTP 3xx redirects. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:=true + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch",displayName="Follow HTTP Redirects" + FollowRedirects bool `json:"followRedirects"` +} + +// RelabelActionType defines the enumeration type for RelabelConfig actions. +// +// +kubebuilder:validation:Enum=drop;hashmod;keep;labeldrop;labelkeep;labelmap;replace +type RelabelActionType string + +// RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. +// It defines ``-section of Prometheus configuration. +// More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs +type RelabelConfig struct { + // The source labels select values from existing labels. Their content is concatenated + // using the configured separator and matched against the configured regular expression + // for the replace, keep, and drop actions. + // + // +required + // +kubebuilder:validation:Required + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Source Labels" + SourceLabels []string `json:"sourceLabels"` + + // Separator placed between concatenated source label values. default is ';'. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:=";" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Separator" + Separator string `json:"separator,omitempty"` + + // Label to which the resulting value is written in a replace action. + // It is mandatory for replace actions. Regex capture groups are available. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Target Label" + TargetLabel string `json:"targetLabel,omitempty"` + + // Regular expression against which the extracted value is matched. Default is '(.*)' + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="(.*)" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Regex" + Regex string `json:"regex,omitempty"` + + // Modulus to take of the hash of the source label values. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Modulus" + Modulus uint64 `json:"modulus,omitempty"` + + // Replacement value against which a regex replace is performed if the + // regular expression matches. Regex capture groups are available. Default is '$1' + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="$1" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Replacement" + Replacement string `json:"replacement,omitempty"` + + // Action to perform based on regex matching. Default is 'replace' + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="replace" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Action" + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors={"urn:alm:descriptor:com.tectonic.ui:select:drop","urn:alm:descriptor:com.tectonic.ui:select:hashmod","urn:alm:descriptor:com.tectonic.ui:select:keep","urn:alm:descriptor:com.tectonic.ui:select:labeldrop","urn:alm:descriptor:com.tectonic.ui:select:labelkeep","urn:alm:descriptor:com.tectonic.ui:select:labelmap","urn:alm:descriptor:com.tectonic.ui:select:replace"},displayName="Action" + Action RelabelActionType `json:"action,omitempty"` +} + +// RemoteWriteClientQueueSpec defines the configuration of the remote write client queue. +type RemoteWriteClientQueueSpec struct { + // Number of samples to buffer per shard before we block reading of more + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:=2500 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Queue Capacity" + Capacity int32 `json:"capacity,omitempty"` + + // Maximum number of shards, i.e. amount of concurrency. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:=200 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Maximum Shards" + MaxShards int32 `json:"maxShards,omitempty"` + + // Minimum number of shards, i.e. amount of concurrency. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:=200 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Minimum Shards" + MinShards int32 `json:"minShards,omitempty"` + + // Maximum number of samples per send. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:=500 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Maximum Shards per Send" + MaxSamplesPerSend int32 `json:"maxSamplesPerSend,omitempty"` + + // Maximum time a sample will wait in buffer. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="5s" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Batch Send Deadline" + BatchSendDeadline PrometheusDuration `json:"batchSendDeadline,omitempty"` + + // Initial retry delay. Gets doubled for every retry. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="30ms" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Min BackOff Period" + MinBackOffPeriod PrometheusDuration `json:"minBackOffPeriod,omitempty"` + + // Maximum retry delay. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="100ms" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Max BackOff Period" + MaxBackOffPeriod PrometheusDuration `json:"maxBackOffPeriod,omitempty"` +} + +// RemoteWriteSpec defines the configuration for ruler's remote_write connectivity. +type RemoteWriteSpec struct { + // Enable remote-write functionality. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch",displayName="Enabled" + Enabled bool `json:"enabled,omitempty"` + + // Minimum period to wait between refreshing remote-write reconfigurations. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="10s" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Min Refresh Period" + RefreshPeriod PrometheusDuration `json:"refreshPeriod,omitempty"` + + // Defines the configuration for remote write client. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced",displayName="Client" + ClientSpec *RemoteWriteClientSpec `json:"client,omitempty"` + + // Defines the configuration for remote write client queue. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced",displayName="Client Queue" + QueueSpec *RemoteWriteClientQueueSpec `json:"queue,omitempty"` +} + +// RulerConfigSpec defines the desired state of Ruler +type RulerConfigSpec struct { + // Interval on how frequently to evaluate rules. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="1m" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Evaluation Interval" + EvalutionInterval PrometheusDuration `json:"evaluationInterval,omitempty"` + + // Interval on how frequently to poll for new rule definitions. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:default:="1m" + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Poll Interval" + PollInterval PrometheusDuration `json:"pollInterval,omitempty"` + + // Defines alert manager configuration to notify on firing alerts. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced",displayName="Alert Manager Configuration" + AlertManagerSpec *AlertManagerSpec `json:"alertmanager,omitempty"` + + // Defines a remote write endpoint to write recording rule metrics. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced",displayName="Remote Write Configuration" + RemoteWriteSpec *RemoteWriteSpec `json:"remoteWrite,omitempty"` +} + +// RulerConfigStatus defines the observed state of RulerConfig +type RulerConfigStatus struct { + // Conditions of the RulerConfig health. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=status,xDescriptors="urn:alm:descriptor:io.kubernetes.conditions" + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +// RulerConfig is the Schema for the rulerconfigs API +// +// +operator-sdk:csv:customresourcedefinitions:displayName="RulerConfig",resources={{LokiStack,v1beta1}} +type RulerConfig struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec RulerConfigSpec `json:"spec,omitempty"` + Status RulerConfigStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// RulerConfigList contains a list of RuleConfig +type RulerConfigList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []RulerConfig `json:"items"` +} + +func init() { + SchemeBuilder.Register(&RulerConfig{}, &RulerConfigList{}) +} diff --git a/operator/api/v1beta1/zz_generated.deepcopy.go b/operator/api/v1beta1/zz_generated.deepcopy.go index 438158ac21081..22c5ccd1c1ec0 100644 --- a/operator/api/v1beta1/zz_generated.deepcopy.go +++ b/operator/api/v1beta1/zz_generated.deepcopy.go @@ -11,6 +11,73 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AlertManagerDiscoverySpec) DeepCopyInto(out *AlertManagerDiscoverySpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AlertManagerDiscoverySpec. +func (in *AlertManagerDiscoverySpec) DeepCopy() *AlertManagerDiscoverySpec { + if in == nil { + return nil + } + out := new(AlertManagerDiscoverySpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AlertManagerNotificationQueueSpec) DeepCopyInto(out *AlertManagerNotificationQueueSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AlertManagerNotificationQueueSpec. +func (in *AlertManagerNotificationQueueSpec) DeepCopy() *AlertManagerNotificationQueueSpec { + if in == nil { + return nil + } + out := new(AlertManagerNotificationQueueSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AlertManagerSpec) DeepCopyInto(out *AlertManagerSpec) { + *out = *in + if in.ExternalLabels != nil { + in, out := &in.ExternalLabels, &out.ExternalLabels + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Endpoints != nil { + in, out := &in.Endpoints, &out.Endpoints + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.DiscoverySpec != nil { + in, out := &in.DiscoverySpec, &out.DiscoverySpec + *out = new(AlertManagerDiscoverySpec) + **out = **in + } + if in.NotificationQueueSpec != nil { + in, out := &in.NotificationQueueSpec, &out.NotificationQueueSpec + *out = new(AlertManagerNotificationQueueSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AlertManagerSpec. +func (in *AlertManagerSpec) DeepCopy() *AlertManagerSpec { + if in == nil { + return nil + } + out := new(AlertManagerSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *AlertingRule) DeepCopyInto(out *AlertingRule) { *out = *in @@ -909,6 +976,95 @@ func (in *RecordingRuleStatus) DeepCopy() *RecordingRuleStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RelabelConfig) DeepCopyInto(out *RelabelConfig) { + *out = *in + if in.SourceLabels != nil { + in, out := &in.SourceLabels, &out.SourceLabels + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RelabelConfig. +func (in *RelabelConfig) DeepCopy() *RelabelConfig { + if in == nil { + return nil + } + out := new(RelabelConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoteWriteClientQueueSpec) DeepCopyInto(out *RemoteWriteClientQueueSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteWriteClientQueueSpec. +func (in *RemoteWriteClientQueueSpec) DeepCopy() *RemoteWriteClientQueueSpec { + if in == nil { + return nil + } + out := new(RemoteWriteClientQueueSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoteWriteClientSpec) DeepCopyInto(out *RemoteWriteClientSpec) { + *out = *in + if in.AdditionalHeaders != nil { + in, out := &in.AdditionalHeaders, &out.AdditionalHeaders + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.RelabelConfigs != nil { + in, out := &in.RelabelConfigs, &out.RelabelConfigs + *out = make([]RelabelConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteWriteClientSpec. +func (in *RemoteWriteClientSpec) DeepCopy() *RemoteWriteClientSpec { + if in == nil { + return nil + } + out := new(RemoteWriteClientSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoteWriteSpec) DeepCopyInto(out *RemoteWriteSpec) { + *out = *in + if in.ClientSpec != nil { + in, out := &in.ClientSpec, &out.ClientSpec + *out = new(RemoteWriteClientSpec) + (*in).DeepCopyInto(*out) + } + if in.QueueSpec != nil { + in, out := &in.QueueSpec, &out.QueueSpec + *out = new(RemoteWriteClientQueueSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteWriteSpec. +func (in *RemoteWriteSpec) DeepCopy() *RemoteWriteSpec { + if in == nil { + return nil + } + out := new(RemoteWriteSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RoleBindingsSpec) DeepCopyInto(out *RoleBindingsSpec) { *out = *in @@ -964,6 +1120,112 @@ func (in *RoleSpec) DeepCopy() *RoleSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RulerConfig) DeepCopyInto(out *RulerConfig) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RulerConfig. +func (in *RulerConfig) DeepCopy() *RulerConfig { + if in == nil { + return nil + } + out := new(RulerConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RulerConfig) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RulerConfigList) DeepCopyInto(out *RulerConfigList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]RulerConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RulerConfigList. +func (in *RulerConfigList) DeepCopy() *RulerConfigList { + if in == nil { + return nil + } + out := new(RulerConfigList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RulerConfigList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RulerConfigSpec) DeepCopyInto(out *RulerConfigSpec) { + *out = *in + if in.AlertManagerSpec != nil { + in, out := &in.AlertManagerSpec, &out.AlertManagerSpec + *out = new(AlertManagerSpec) + (*in).DeepCopyInto(*out) + } + if in.RemoteWriteSpec != nil { + in, out := &in.RemoteWriteSpec, &out.RemoteWriteSpec + *out = new(RemoteWriteSpec) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RulerConfigSpec. +func (in *RulerConfigSpec) DeepCopy() *RulerConfigSpec { + if in == nil { + return nil + } + out := new(RulerConfigSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RulerConfigStatus) DeepCopyInto(out *RulerConfigStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RulerConfigStatus. +func (in *RulerConfigStatus) DeepCopy() *RulerConfigStatus { + if in == nil { + return nil + } + out := new(RulerConfigStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RulesSpec) DeepCopyInto(out *RulesSpec) { *out = *in diff --git a/operator/bundle/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/manifests/loki-operator.clusterserviceversion.yaml index fe768f9bc439d..0f87de6222973 100644 --- a/operator/bundle/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/manifests/loki-operator.clusterserviceversion.yaml @@ -85,6 +85,66 @@ metadata: ], "tenantID": "test-tenant" } + }, + { + "apiVersion": "loki.grafana.com/v1beta1", + "kind": "RulerConfig", + "metadata": { + "name": "rulerconfig-sample" + }, + "spec": { + "alertmanager": { + "discovery": { + "enabled": true, + "refreshInterval": "1m" + }, + "enableV2": true, + "endpoints": [ + "http://alertmanager-host1.mycompany.org", + "http://alertmanager-host2.mycompany.org" + ], + "externalLabels": { + "environment": "production", + "region": "us-east-2" + }, + "externalUrl": "http://www.mycompany.org/alerts", + "notificationQueue": { + "capacity": 1000, + "forGracePeriod": "10m", + "forOutageTolerance": "1h", + "resendDelay": "1m", + "timeout": "30s" + } + }, + "evaluationInterval": "1m", + "pollInterval": "1m", + "remoteWrite": { + "client": { + "authorization": "basic", + "authorizationSecretName": "my-secret-resource", + "followRedirects": true, + "name": "remote-write-log-metrics", + "proxyURL": "http://proxy-host.mycompany.org", + "relabelConfigs": [ + { + "action": "replace", + "regex": "ALERTS.*", + "replacement": "$1", + "separator": ";", + "source_labels": [ + "labelc", + "labeld" + ], + "targetLabel": "labelnew" + } + ], + "timeout": "30s", + "url": "http://remote-write-host.mycompany.org" + }, + "enabled": true, + "refreshPeriod": "10s" + } + } } ] capabilities: Full Lifecycle @@ -636,6 +696,209 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes.conditions version: v1beta1 + - description: RulerConfig is the Schema for the rulerconfigs API + displayName: RulerConfig + kind: RulerConfig + name: rulerconfigs.loki.grafana.com + resources: + - kind: LokiStack + name: "" + version: v1beta1 + specDescriptors: + - description: Defines alert manager configuration to notify on firing alerts. + displayName: Alert Manager Configuration + path: alertmanager + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Defines the configuration for DNS-based discovery of AlertManager + hosts. + displayName: DNS Discovery + path: alertmanager.discovery + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Use DNS SRV records to discover Alertmanager hosts. + displayName: Enable SRV + path: alertmanager.discovery.enableSRV + - description: How long to wait between refreshing DNS resolutions of Alertmanager + hosts. + displayName: Refresh Interval + path: alertmanager.discovery.refreshInterval + - description: If enabled, then requests to Alertmanager use the v2 API. + displayName: Enable AlertManager V2 API + path: alertmanager.enableV2 + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: List of AlertManager URLs to send notifications to. Each Alertmanager + URL is treated as a separate group in the configuration. Multiple Alertmanagers + in HA per group can be supported by using DNS resolution (See EnableDNSDiscovery). + displayName: AlertManager Endpoints + path: alertmanager.endpoints + - description: Additional labels to add to all alerts. + displayName: Extra Alert Labels + path: alertmanager.externalLabels + - description: URL for alerts return path. + displayName: Alert External URL + path: alertmanager.externalUrl + - description: Defines the configuration for the notification queue to AlertManager + hosts. + displayName: Notification Queue + path: alertmanager.notificationQueue + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Capacity of the queue for notifications to be sent to the Alertmanager. + displayName: Notification Queue Capacity + path: alertmanager.notificationQueue.capacity + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Minimum duration between alert and restored "for" state. This + is maintained only for alerts with configured "for" time greater than the + grace period. + displayName: Firing Grace Period + path: alertmanager.notificationQueue.forGracePeriod + - description: Max time to tolerate outage for restoring "for" state of alert. + displayName: Outage Tolerance + path: alertmanager.notificationQueue.forOutageTolerance + - description: Minimum amount of time to wait before resending an alert to Alertmanager. + displayName: Resend Delay + path: alertmanager.notificationQueue.resendDelay + - description: HTTP timeout duration when sending notifications to the Alertmanager. + displayName: Timeout + path: alertmanager.notificationQueue.timeout + - description: Interval on how frequently to evaluate rules. + displayName: Evaluation Interval + path: evaluationInterval + - description: Interval on how frequently to poll for new rule definitions. + displayName: Poll Interval + path: pollInterval + - description: Defines a remote write endpoint to write recording rule metrics. + displayName: Remote Write Configuration + path: remoteWrite + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Defines the configuration for remote write client. + displayName: Client + path: remoteWrite.client + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Type of authorzation to use to access the remote write endpoint + displayName: Authorization Type + path: remoteWrite.client.authorization + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:basic + - urn:alm:descriptor:com.tectonic.ui:select:header + - description: Name of a secret in the namespace configured for authorization + secrets. + displayName: Authorization Secret Name + path: remoteWrite.client.authorizationSecretName + x-descriptors: + - urn:alm:descriptor:io.kubernetes:Secret + - description: Configure whether HTTP requests follow HTTP 3xx redirects. + displayName: Follow HTTP Redirects + path: remoteWrite.client.followRedirects + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: Name of the remote write config, which if specified must be unique + among remote write configs. + displayName: Name + path: remoteWrite.client.name + - description: Optional proxy URL. + displayName: HTTP Proxy URL + path: remoteWrite.client.proxyUrl + - description: List of remote write relabel configurations. + displayName: Metric Relabel Configuration + path: remoteWrite.client.relabelConfigs + - description: Action to perform based on regex matching. Default is 'replace' + displayName: Action + path: remoteWrite.client.relabelConfigs[0].action + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:drop + - urn:alm:descriptor:com.tectonic.ui:select:hashmod + - urn:alm:descriptor:com.tectonic.ui:select:keep + - urn:alm:descriptor:com.tectonic.ui:select:labeldrop + - urn:alm:descriptor:com.tectonic.ui:select:labelkeep + - urn:alm:descriptor:com.tectonic.ui:select:labelmap + - urn:alm:descriptor:com.tectonic.ui:select:replace + - description: Modulus to take of the hash of the source label values. + displayName: Modulus + path: remoteWrite.client.relabelConfigs[0].modulus + - description: Regular expression against which the extracted value is matched. + Default is '(.*)' + displayName: Regex + path: remoteWrite.client.relabelConfigs[0].regex + - description: Replacement value against which a regex replace is performed + if the regular expression matches. Regex capture groups are available. Default + is '$1' + displayName: Replacement + path: remoteWrite.client.relabelConfigs[0].replacement + - description: Separator placed between concatenated source label values. default + is ';'. + displayName: Separator + path: remoteWrite.client.relabelConfigs[0].separator + - description: The source labels select values from existing labels. Their content + is concatenated using the configured separator and matched against the configured + regular expression for the replace, keep, and drop actions. + displayName: Source Labels + path: remoteWrite.client.relabelConfigs[0].sourceLabels + - description: Label to which the resulting value is written in a replace action. + It is mandatory for replace actions. Regex capture groups are available. + displayName: Target Label + path: remoteWrite.client.relabelConfigs[0].targetLabel + - description: Timeout for requests to the remote write endpoint. + displayName: Remote Write Timeout + path: remoteWrite.client.timeout + - description: The URL of the endpoint to send samples to. + displayName: Endpoint + path: remoteWrite.client.url + - description: Enable remote-write functionality. + displayName: Enabled + path: remoteWrite.enabled + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: Defines the configuration for remote write client queue. + displayName: Client Queue + path: remoteWrite.queue + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Maximum time a sample will wait in buffer. + displayName: Batch Send Deadline + path: remoteWrite.queue.batchSendDeadline + - description: Number of samples to buffer per shard before we block reading + of more + displayName: Queue Capacity + path: remoteWrite.queue.capacity + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Maximum retry delay. + displayName: Max BackOff Period + path: remoteWrite.queue.maxBackOffPeriod + - description: Maximum number of samples per send. + displayName: Maximum Shards per Send + path: remoteWrite.queue.maxSamplesPerSend + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Maximum number of shards, i.e. amount of concurrency. + displayName: Maximum Shards + path: remoteWrite.queue.maxShards + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Initial retry delay. Gets doubled for every retry. + displayName: Min BackOff Period + path: remoteWrite.queue.minBackOffPeriod + - description: Minimum number of shards, i.e. amount of concurrency. + displayName: Minimum Shards + path: remoteWrite.queue.minShards + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Minimum period to wait between refreshing remote-write reconfigurations. + displayName: Min Refresh Period + path: remoteWrite.refreshPeriod + statusDescriptors: + - description: Conditions of the RulerConfig health. + displayName: Conditions + path: conditions + x-descriptors: + - urn:alm:descriptor:io.kubernetes.conditions + version: v1beta1 description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements @@ -785,6 +1048,32 @@ spec: - get - patch - update + - apiGroups: + - loki.grafana.com + resources: + - rulerconfigs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - loki.grafana.com + resources: + - rulerconfigs/finalizers + verbs: + - update + - apiGroups: + - loki.grafana.com + resources: + - rulerconfigs/status + verbs: + - get + - patch + - update - apiGroups: - monitoring.coreos.com resources: diff --git a/operator/bundle/manifests/loki.grafana.com_rulerconfigs.yaml b/operator/bundle/manifests/loki.grafana.com_rulerconfigs.yaml new file mode 100644 index 0000000000000..d3c83118f739e --- /dev/null +++ b/operator/bundle/manifests/loki.grafana.com_rulerconfigs.yaml @@ -0,0 +1,375 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.8.0 + creationTimestamp: null + labels: + app.kubernetes.io/instance: loki-operator-v0.0.1 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: cluster-logging + app.kubernetes.io/version: 0.0.1 + name: rulerconfigs.loki.grafana.com +spec: + group: loki.grafana.com + names: + kind: RulerConfig + listKind: RulerConfigList + plural: rulerconfigs + singular: rulerconfig + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: RulerConfig is the Schema for the rulerconfigs API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: RulerConfigSpec defines the desired state of Ruler + properties: + alertmanager: + description: Defines alert manager configuration to notify on firing + alerts. + properties: + discovery: + description: Defines the configuration for DNS-based discovery + of AlertManager hosts. + properties: + enableSRV: + description: Use DNS SRV records to discover Alertmanager + hosts. + type: boolean + refreshInterval: + default: 1m + description: How long to wait between refreshing DNS resolutions + of Alertmanager hosts. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + type: object + enableV2: + description: If enabled, then requests to Alertmanager use the + v2 API. + type: boolean + endpoints: + description: List of AlertManager URLs to send notifications to. + Each Alertmanager URL is treated as a separate group in the + configuration. Multiple Alertmanagers in HA per group can be + supported by using DNS resolution (See EnableDNSDiscovery). + items: + type: string + type: array + externalLabels: + additionalProperties: + type: string + description: Additional labels to add to all alerts. + type: object + externalUrl: + description: URL for alerts return path. + type: string + notificationQueue: + description: Defines the configuration for the notification queue + to AlertManager hosts. + properties: + capacity: + default: 10000 + description: Capacity of the queue for notifications to be + sent to the Alertmanager. + format: int32 + type: integer + forGracePeriod: + default: 10m + description: Minimum duration between alert and restored "for" + state. This is maintained only for alerts with configured + "for" time greater than the grace period. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + forOutageTolerance: + default: 1h + description: Max time to tolerate outage for restoring "for" + state of alert. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + resendDelay: + default: 1m + description: Minimum amount of time to wait before resending + an alert to Alertmanager. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + timeout: + default: 10s + description: HTTP timeout duration when sending notifications + to the Alertmanager. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + type: object + required: + - endpoints + type: object + evaluationInterval: + default: 1m + description: Interval on how frequently to evaluate rules. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + pollInterval: + default: 1m + description: Interval on how frequently to poll for new rule definitions. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + remoteWrite: + description: Defines a remote write endpoint to write recording rule + metrics. + properties: + client: + description: Defines the configuration for remote write client. + properties: + additionalHeaders: + additionalProperties: + type: string + description: Additional HTTP headers to be sent along with + each remote write request. + type: object + authorization: + description: Type of authorzation to use to access the remote + write endpoint + enum: + - basic + - header + type: string + authorizationSecretName: + description: Name of a secret in the namespace configured + for authorization secrets. + type: string + followRedirects: + default: true + description: Configure whether HTTP requests follow HTTP 3xx + redirects. + type: boolean + name: + description: Name of the remote write config, which if specified + must be unique among remote write configs. + type: string + proxyUrl: + description: Optional proxy URL. + type: string + relabelConfigs: + description: List of remote write relabel configurations. + items: + description: 'RelabelConfig allows dynamic rewriting of + the label set, being applied to samples before ingestion. + It defines ``-section of Prometheus + configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + default: replace + description: Action to perform based on regex matching. + Default is 'replace' + enum: + - drop + - hashmod + - keep + - labeldrop + - labelkeep + - labelmap + - replace + type: string + modulus: + description: Modulus to take of the hash of the source + label values. + format: int64 + type: integer + regex: + default: (.*) + description: Regular expression against which the extracted + value is matched. Default is '(.*)' + type: string + replacement: + default: $1 + description: Replacement value against which a regex + replace is performed if the regular expression matches. + Regex capture groups are available. Default is '$1' + type: string + separator: + default: ; + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular + expression for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + required: + - sourceLabels + type: object + type: array + timeout: + default: 30s + description: Timeout for requests to the remote write endpoint. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + url: + description: The URL of the endpoint to send samples to. + type: string + required: + - authorization + - authorizationSecretName + - name + - url + type: object + enabled: + description: Enable remote-write functionality. + type: boolean + queue: + description: Defines the configuration for remote write client + queue. + properties: + batchSendDeadline: + default: 5s + description: Maximum time a sample will wait in buffer. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + capacity: + default: 2500 + description: Number of samples to buffer per shard before + we block reading of more + format: int32 + type: integer + maxBackOffPeriod: + default: 100ms + description: Maximum retry delay. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + maxSamplesPerSend: + default: 500 + description: Maximum number of samples per send. + format: int32 + type: integer + maxShards: + default: 200 + description: Maximum number of shards, i.e. amount of concurrency. + format: int32 + type: integer + minBackOffPeriod: + default: 30ms + description: Initial retry delay. Gets doubled for every retry. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + minShards: + default: 200 + description: Minimum number of shards, i.e. amount of concurrency. + format: int32 + type: integer + type: object + refreshPeriod: + default: 10s + description: Minimum period to wait between refreshing remote-write + reconfigurations. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + type: object + type: object + status: + description: RulerConfigStatus defines the observed state of RulerConfig + properties: + conditions: + description: Conditions of the RulerConfig health. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource. --- This struct is intended for direct + use as an array at the field path .status.conditions. For example, + type FooStatus struct{ // Represents the observations of a foo's + current state. // Known .status.conditions.type are: \"Available\", + \"Progressing\", and \"Degraded\" // +patchMergeKey=type // +patchStrategy=merge + // +listType=map // +listMapKey=type Conditions []metav1.Condition + `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" + protobuf:\"bytes,1,rep,name=conditions\"` \n // other fields }" + properties: + lastTransitionTime: + description: lastTransitionTime is the last time the condition + transitioned from one status to another. This should be when + the underlying condition changed. If that is not known, then + using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: message is a human readable message indicating + details about the transition. This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: observedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if .metadata.generation + is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: reason contains a programmatic identifier indicating + the reason for the condition's last transition. Producers + of specific condition types may define expected values and + meanings for this field, and whether the values are considered + a guaranteed API. The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + --- Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/operator/config/crd/bases/loki.grafana.com_rulerconfigs.yaml b/operator/config/crd/bases/loki.grafana.com_rulerconfigs.yaml new file mode 100644 index 0000000000000..6e49619402b3b --- /dev/null +++ b/operator/config/crd/bases/loki.grafana.com_rulerconfigs.yaml @@ -0,0 +1,370 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.8.0 + creationTimestamp: null + name: rulerconfigs.loki.grafana.com +spec: + group: loki.grafana.com + names: + kind: RulerConfig + listKind: RulerConfigList + plural: rulerconfigs + singular: rulerconfig + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: RulerConfig is the Schema for the rulerconfigs API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: RulerConfigSpec defines the desired state of Ruler + properties: + alertmanager: + description: Defines alert manager configuration to notify on firing + alerts. + properties: + discovery: + description: Defines the configuration for DNS-based discovery + of AlertManager hosts. + properties: + enableSRV: + description: Use DNS SRV records to discover Alertmanager + hosts. + type: boolean + refreshInterval: + default: 1m + description: How long to wait between refreshing DNS resolutions + of Alertmanager hosts. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + type: object + enableV2: + description: If enabled, then requests to Alertmanager use the + v2 API. + type: boolean + endpoints: + description: List of AlertManager URLs to send notifications to. + Each Alertmanager URL is treated as a separate group in the + configuration. Multiple Alertmanagers in HA per group can be + supported by using DNS resolution (See EnableDNSDiscovery). + items: + type: string + type: array + externalLabels: + additionalProperties: + type: string + description: Additional labels to add to all alerts. + type: object + externalUrl: + description: URL for alerts return path. + type: string + notificationQueue: + description: Defines the configuration for the notification queue + to AlertManager hosts. + properties: + capacity: + default: 10000 + description: Capacity of the queue for notifications to be + sent to the Alertmanager. + format: int32 + type: integer + forGracePeriod: + default: 10m + description: Minimum duration between alert and restored "for" + state. This is maintained only for alerts with configured + "for" time greater than the grace period. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + forOutageTolerance: + default: 1h + description: Max time to tolerate outage for restoring "for" + state of alert. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + resendDelay: + default: 1m + description: Minimum amount of time to wait before resending + an alert to Alertmanager. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + timeout: + default: 10s + description: HTTP timeout duration when sending notifications + to the Alertmanager. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + type: object + required: + - endpoints + type: object + evaluationInterval: + default: 1m + description: Interval on how frequently to evaluate rules. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + pollInterval: + default: 1m + description: Interval on how frequently to poll for new rule definitions. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + remoteWrite: + description: Defines a remote write endpoint to write recording rule + metrics. + properties: + client: + description: Defines the configuration for remote write client. + properties: + additionalHeaders: + additionalProperties: + type: string + description: Additional HTTP headers to be sent along with + each remote write request. + type: object + authorization: + description: Type of authorzation to use to access the remote + write endpoint + enum: + - basic + - header + type: string + authorizationSecretName: + description: Name of a secret in the namespace configured + for authorization secrets. + type: string + followRedirects: + default: true + description: Configure whether HTTP requests follow HTTP 3xx + redirects. + type: boolean + name: + description: Name of the remote write config, which if specified + must be unique among remote write configs. + type: string + proxyUrl: + description: Optional proxy URL. + type: string + relabelConfigs: + description: List of remote write relabel configurations. + items: + description: 'RelabelConfig allows dynamic rewriting of + the label set, being applied to samples before ingestion. + It defines ``-section of Prometheus + configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + default: replace + description: Action to perform based on regex matching. + Default is 'replace' + enum: + - drop + - hashmod + - keep + - labeldrop + - labelkeep + - labelmap + - replace + type: string + modulus: + description: Modulus to take of the hash of the source + label values. + format: int64 + type: integer + regex: + default: (.*) + description: Regular expression against which the extracted + value is matched. Default is '(.*)' + type: string + replacement: + default: $1 + description: Replacement value against which a regex + replace is performed if the regular expression matches. + Regex capture groups are available. Default is '$1' + type: string + separator: + default: ; + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular + expression for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + required: + - sourceLabels + type: object + type: array + timeout: + default: 30s + description: Timeout for requests to the remote write endpoint. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + url: + description: The URL of the endpoint to send samples to. + type: string + required: + - authorization + - authorizationSecretName + - name + - url + type: object + enabled: + description: Enable remote-write functionality. + type: boolean + queue: + description: Defines the configuration for remote write client + queue. + properties: + batchSendDeadline: + default: 5s + description: Maximum time a sample will wait in buffer. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + capacity: + default: 2500 + description: Number of samples to buffer per shard before + we block reading of more + format: int32 + type: integer + maxBackOffPeriod: + default: 100ms + description: Maximum retry delay. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + maxSamplesPerSend: + default: 500 + description: Maximum number of samples per send. + format: int32 + type: integer + maxShards: + default: 200 + description: Maximum number of shards, i.e. amount of concurrency. + format: int32 + type: integer + minBackOffPeriod: + default: 30ms + description: Initial retry delay. Gets doubled for every retry. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + minShards: + default: 200 + description: Minimum number of shards, i.e. amount of concurrency. + format: int32 + type: integer + type: object + refreshPeriod: + default: 10s + description: Minimum period to wait between refreshing remote-write + reconfigurations. + pattern: ((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0) + type: string + type: object + type: object + status: + description: RulerConfigStatus defines the observed state of RulerConfig + properties: + conditions: + description: Conditions of the RulerConfig health. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource. --- This struct is intended for direct + use as an array at the field path .status.conditions. For example, + type FooStatus struct{ // Represents the observations of a foo's + current state. // Known .status.conditions.type are: \"Available\", + \"Progressing\", and \"Degraded\" // +patchMergeKey=type // +patchStrategy=merge + // +listType=map // +listMapKey=type Conditions []metav1.Condition + `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" + protobuf:\"bytes,1,rep,name=conditions\"` \n // other fields }" + properties: + lastTransitionTime: + description: lastTransitionTime is the last time the condition + transitioned from one status to another. This should be when + the underlying condition changed. If that is not known, then + using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: message is a human readable message indicating + details about the transition. This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: observedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if .metadata.generation + is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: reason contains a programmatic identifier indicating + the reason for the condition's last transition. Producers + of specific condition types may define expected values and + meanings for this field, and whether the values are considered + a guaranteed API. The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + --- Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/operator/config/crd/kustomization.yaml b/operator/config/crd/kustomization.yaml index 2241baa2c8661..e33c65ac2c1c8 100644 --- a/operator/config/crd/kustomization.yaml +++ b/operator/config/crd/kustomization.yaml @@ -5,6 +5,7 @@ resources: - bases/loki.grafana.com_lokistacks.yaml - bases/loki.grafana.com_alertingrules.yaml - bases/loki.grafana.com_recordingrules.yaml +- bases/loki.grafana.com_rulerconfigs.yaml # +kubebuilder:scaffold:crdkustomizeresource patchesStrategicMerge: @@ -13,6 +14,7 @@ patchesStrategicMerge: #- patches/webhook_in_lokistacks.yaml #- patches/webhook_in_alertingrules.yaml #- patches/webhook_in_recordingrules.yaml +#- patches/webhook_in_rulerconfigs.yaml # +kubebuilder:scaffold:crdkustomizewebhookpatch # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. @@ -20,6 +22,7 @@ patchesStrategicMerge: #- patches/cainjection_in_lokistacks.yaml #- patches/cainjection_in_alertingrules.yaml #- patches/cainjection_in_recordingrules.yaml +#- patches/cainjection_in_rulerconfigs.yaml # +kubebuilder:scaffold:crdkustomizecainjectionpatch # the following config is for teaching kustomize how to do kustomization for CRDs. diff --git a/operator/config/manifests/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/bases/loki-operator.clusterserviceversion.yaml index 841f68f505323..b1244d921fa18 100644 --- a/operator/config/manifests/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/bases/loki-operator.clusterserviceversion.yaml @@ -550,6 +550,209 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes.conditions version: v1beta1 + - description: RulerConfig is the Schema for the rulerconfigs API + displayName: RulerConfig + kind: RulerConfig + name: rulerconfigs.loki.grafana.com + resources: + - kind: LokiStack + name: "" + version: v1beta1 + specDescriptors: + - description: Defines alert manager configuration to notify on firing alerts. + displayName: Alert Manager Configuration + path: alertmanager + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Defines the configuration for DNS-based discovery of AlertManager + hosts. + displayName: DNS Discovery + path: alertmanager.discovery + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Use DNS SRV records to discover Alertmanager hosts. + displayName: Enable SRV + path: alertmanager.discovery.enableSRV + - description: How long to wait between refreshing DNS resolutions of Alertmanager + hosts. + displayName: Refresh Interval + path: alertmanager.discovery.refreshInterval + - description: If enabled, then requests to Alertmanager use the v2 API. + displayName: Enable AlertManager V2 API + path: alertmanager.enableV2 + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: List of AlertManager URLs to send notifications to. Each Alertmanager + URL is treated as a separate group in the configuration. Multiple Alertmanagers + in HA per group can be supported by using DNS resolution (See EnableDNSDiscovery). + displayName: AlertManager Endpoints + path: alertmanager.endpoints + - description: Additional labels to add to all alerts. + displayName: Extra Alert Labels + path: alertmanager.externalLabels + - description: URL for alerts return path. + displayName: Alert External URL + path: alertmanager.externalUrl + - description: Defines the configuration for the notification queue to AlertManager + hosts. + displayName: Notification Queue + path: alertmanager.notificationQueue + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Capacity of the queue for notifications to be sent to the Alertmanager. + displayName: Notification Queue Capacity + path: alertmanager.notificationQueue.capacity + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Minimum duration between alert and restored "for" state. This + is maintained only for alerts with configured "for" time greater than the + grace period. + displayName: Firing Grace Period + path: alertmanager.notificationQueue.forGracePeriod + - description: Max time to tolerate outage for restoring "for" state of alert. + displayName: Outage Tolerance + path: alertmanager.notificationQueue.forOutageTolerance + - description: Minimum amount of time to wait before resending an alert to Alertmanager. + displayName: Resend Delay + path: alertmanager.notificationQueue.resendDelay + - description: HTTP timeout duration when sending notifications to the Alertmanager. + displayName: Timeout + path: alertmanager.notificationQueue.timeout + - description: Interval on how frequently to evaluate rules. + displayName: Evaluation Interval + path: evaluationInterval + - description: Interval on how frequently to poll for new rule definitions. + displayName: Poll Interval + path: pollInterval + - description: Defines a remote write endpoint to write recording rule metrics. + displayName: Remote Write Configuration + path: remoteWrite + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Defines the configuration for remote write client. + displayName: Client + path: remoteWrite.client + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Type of authorzation to use to access the remote write endpoint + displayName: Authorization Type + path: remoteWrite.client.authorization + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:basic + - urn:alm:descriptor:com.tectonic.ui:select:header + - description: Name of a secret in the namespace configured for authorization + secrets. + displayName: Authorization Secret Name + path: remoteWrite.client.authorizationSecretName + x-descriptors: + - urn:alm:descriptor:io.kubernetes:Secret + - description: Configure whether HTTP requests follow HTTP 3xx redirects. + displayName: Follow HTTP Redirects + path: remoteWrite.client.followRedirects + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: Name of the remote write config, which if specified must be unique + among remote write configs. + displayName: Name + path: remoteWrite.client.name + - description: Optional proxy URL. + displayName: HTTP Proxy URL + path: remoteWrite.client.proxyUrl + - description: List of remote write relabel configurations. + displayName: Metric Relabel Configuration + path: remoteWrite.client.relabelConfigs + - description: Action to perform based on regex matching. Default is 'replace' + displayName: Action + path: remoteWrite.client.relabelConfigs[0].action + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:drop + - urn:alm:descriptor:com.tectonic.ui:select:hashmod + - urn:alm:descriptor:com.tectonic.ui:select:keep + - urn:alm:descriptor:com.tectonic.ui:select:labeldrop + - urn:alm:descriptor:com.tectonic.ui:select:labelkeep + - urn:alm:descriptor:com.tectonic.ui:select:labelmap + - urn:alm:descriptor:com.tectonic.ui:select:replace + - description: Modulus to take of the hash of the source label values. + displayName: Modulus + path: remoteWrite.client.relabelConfigs[0].modulus + - description: Regular expression against which the extracted value is matched. + Default is '(.*)' + displayName: Regex + path: remoteWrite.client.relabelConfigs[0].regex + - description: Replacement value against which a regex replace is performed + if the regular expression matches. Regex capture groups are available. Default + is '$1' + displayName: Replacement + path: remoteWrite.client.relabelConfigs[0].replacement + - description: Separator placed between concatenated source label values. default + is ';'. + displayName: Separator + path: remoteWrite.client.relabelConfigs[0].separator + - description: The source labels select values from existing labels. Their content + is concatenated using the configured separator and matched against the configured + regular expression for the replace, keep, and drop actions. + displayName: Source Labels + path: remoteWrite.client.relabelConfigs[0].sourceLabels + - description: Label to which the resulting value is written in a replace action. + It is mandatory for replace actions. Regex capture groups are available. + displayName: Target Label + path: remoteWrite.client.relabelConfigs[0].targetLabel + - description: Timeout for requests to the remote write endpoint. + displayName: Remote Write Timeout + path: remoteWrite.client.timeout + - description: The URL of the endpoint to send samples to. + displayName: Endpoint + path: remoteWrite.client.url + - description: Enable remote-write functionality. + displayName: Enabled + path: remoteWrite.enabled + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: Defines the configuration for remote write client queue. + displayName: Client Queue + path: remoteWrite.queue + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Maximum time a sample will wait in buffer. + displayName: Batch Send Deadline + path: remoteWrite.queue.batchSendDeadline + - description: Number of samples to buffer per shard before we block reading + of more + displayName: Queue Capacity + path: remoteWrite.queue.capacity + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Maximum retry delay. + displayName: Max BackOff Period + path: remoteWrite.queue.maxBackOffPeriod + - description: Maximum number of samples per send. + displayName: Maximum Shards per Send + path: remoteWrite.queue.maxSamplesPerSend + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Maximum number of shards, i.e. amount of concurrency. + displayName: Maximum Shards + path: remoteWrite.queue.maxShards + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Initial retry delay. Gets doubled for every retry. + displayName: Min BackOff Period + path: remoteWrite.queue.minBackOffPeriod + - description: Minimum number of shards, i.e. amount of concurrency. + displayName: Minimum Shards + path: remoteWrite.queue.minShards + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Minimum period to wait between refreshing remote-write reconfigurations. + displayName: Min Refresh Period + path: remoteWrite.refreshPeriod + statusDescriptors: + - description: Conditions of the RulerConfig health. + displayName: Conditions + path: conditions + x-descriptors: + - urn:alm:descriptor:io.kubernetes.conditions + version: v1beta1 description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 9ec496bfe1097..52ed54f6f1709 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -137,6 +137,32 @@ rules: - get - patch - update +- apiGroups: + - loki.grafana.com + resources: + - rulerconfigs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - loki.grafana.com + resources: + - rulerconfigs/finalizers + verbs: + - update +- apiGroups: + - loki.grafana.com + resources: + - rulerconfigs/status + verbs: + - get + - patch + - update - apiGroups: - monitoring.coreos.com resources: diff --git a/operator/config/rbac/rulerconfig_editor_role.yaml b/operator/config/rbac/rulerconfig_editor_role.yaml new file mode 100644 index 0000000000000..46c17c8564ae8 --- /dev/null +++ b/operator/config/rbac/rulerconfig_editor_role.yaml @@ -0,0 +1,24 @@ +# permissions for end users to edit rulerconfigs. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rulerconfig-editor-role +rules: +- apiGroups: + - loki.grafana.com + resources: + - rulerconfigs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - loki.grafana.com + resources: + - rulerconfigs/status + verbs: + - get diff --git a/operator/config/rbac/rulerconfig_viewer_role.yaml b/operator/config/rbac/rulerconfig_viewer_role.yaml new file mode 100644 index 0000000000000..9512c494c7208 --- /dev/null +++ b/operator/config/rbac/rulerconfig_viewer_role.yaml @@ -0,0 +1,20 @@ +# permissions for end users to view rulerconfigs. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rulerconfig-viewer-role +rules: +- apiGroups: + - loki.grafana.com + resources: + - rulerconfigs + verbs: + - get + - list + - watch +- apiGroups: + - loki.grafana.com + resources: + - rulerconfigs/status + verbs: + - get diff --git a/operator/config/samples/kustomization.yaml b/operator/config/samples/kustomization.yaml index 34f348fcffb8b..bd14a02d9b59d 100644 --- a/operator/config/samples/kustomization.yaml +++ b/operator/config/samples/kustomization.yaml @@ -3,4 +3,5 @@ resources: - loki_v1beta1_lokistack.yaml - loki_v1beta1_alertingrule.yaml - loki_v1beta1_recordingrule.yaml +- loki_v1beta1_rulerconfig.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/operator/config/samples/loki_v1beta1_rulerconfig.yaml b/operator/config/samples/loki_v1beta1_rulerconfig.yaml new file mode 100644 index 0000000000000..1e744a34f8e89 --- /dev/null +++ b/operator/config/samples/loki_v1beta1_rulerconfig.yaml @@ -0,0 +1,42 @@ +apiVersion: loki.grafana.com/v1beta1 +kind: RulerConfig +metadata: + name: rulerconfig-sample +spec: + evaluationInterval: 1m + pollInterval: 1m + alertmanager: + externalUrl: http://www.mycompany.org/alerts + externalLabels: + environment: production + region: us-east-2 + enableV2: true + endpoints: + - http://alertmanager-host1.mycompany.org + - http://alertmanager-host2.mycompany.org + discovery: + enableSRV: true + refreshInterval: 1m + notificationQueue: + capacity: 1000 + timeout: 30s + forOutageTolerance: 1h + forGracePeriod: 10m + resendDelay: 1m + remoteWrite: + enabled: true + refreshPeriod: 10s + client: + name: remote-write-log-metrics + url: http://remote-write-host.mycompany.org + timeout: 30s + authorization: basic + authorizationSecretName: my-secret-resource + proxyUrl: http://proxy-host.mycompany.org + relabelConfigs: + - sourceLabels: ["labelc","labeld"] + regex: ALERTS.* + action: replace + separator: "" + replacement: $1 + targetLabel: labelnew diff --git a/operator/controllers/internal/lokistack/ruler_config_discovery.go b/operator/controllers/internal/lokistack/ruler_config_discovery.go new file mode 100644 index 0000000000000..e4182c58bbcc1 --- /dev/null +++ b/operator/controllers/internal/lokistack/ruler_config_discovery.go @@ -0,0 +1,42 @@ +package lokistack + +import ( + "context" + "time" + + "github.com/ViaQ/logerr/v2/kverrors" + lokistackv1beta1 "github.com/grafana/loki/operator/api/v1beta1" + "github.com/grafana/loki/operator/internal/external/k8s" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// AnnotateForRulerConfig adds/updates the `loki.grafana.com/rulerConfigDiscoveredAt` annotation +// to the named Lokistack in the same namespace of the RulerConfig. If no LokiStack is found, then +// skip reconciliation. +func AnnotateForRulerConfig(ctx context.Context, k k8s.Client, name, namespace string) error { + var s lokistackv1beta1.LokiStack + key := client.ObjectKey{Name: name, Namespace: namespace} + + if err := k.Get(ctx, key, &s); err != nil { + if apierrors.IsNotFound(err) { + // Do nothing + return nil + } + + return kverrors.Wrap(err, "failed to get lokistack", "key", key) + } + + ss := s.DeepCopy() + if ss.Annotations == nil { + ss.Annotations = make(map[string]string) + } + + ss.Annotations["loki.grafana.com/rulerConfigDiscoveredAt"] = time.Now().UTC().Format(time.RFC3339) + + if err := k.Update(ctx, ss); err != nil { + return kverrors.Wrap(err, "failed to update lokistack `rulerConfigDiscoveredAt` annotation", "key", key) + } + + return nil +} diff --git a/operator/controllers/rulerconfig_controller.go b/operator/controllers/rulerconfig_controller.go new file mode 100644 index 0000000000000..fdb9b50788ba2 --- /dev/null +++ b/operator/controllers/rulerconfig_controller.go @@ -0,0 +1,57 @@ +package controllers + +import ( + "context" + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + lokiv1beta1 "github.com/grafana/loki/operator/api/v1beta1" + "github.com/grafana/loki/operator/controllers/internal/lokistack" +) + +// RulerConfigReconciler reconciles a RulerConfig object +type RulerConfigReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=loki.grafana.com,resources=rulerconfigs,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=loki.grafana.com,resources=rulerconfigs/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=loki.grafana.com,resources=rulerconfigs/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile +func (r *RulerConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + var rc lokiv1beta1.RulerConfig + key := client.ObjectKey{Name: req.Name, Namespace: req.Namespace} + if err := r.Get(ctx, key, &rc); err != nil { + if errors.IsNotFound(err) { + return ctrl.Result{}, nil + } + + return ctrl.Result{Requeue: true, RequeueAfter: time.Second}, err + } + + err := lokistack.AnnotateForRulerConfig(ctx, r.Client, req.Name, req.Namespace) + if err != nil { + return ctrl.Result{ + Requeue: true, + RequeueAfter: time.Second, + }, err + } + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *RulerConfigReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&lokiv1beta1.RulerConfig{}). + Complete(r) +} diff --git a/operator/internal/handlers/internal/rules/config.go b/operator/internal/handlers/internal/rules/config.go new file mode 100644 index 0000000000000..14a9544a13bfd --- /dev/null +++ b/operator/internal/handlers/internal/rules/config.go @@ -0,0 +1,29 @@ +package rules + +import ( + "context" + + "github.com/ViaQ/logerr/v2/kverrors" + lokiv1beta1 "github.com/grafana/loki/operator/api/v1beta1" + "github.com/grafana/loki/operator/internal/external/k8s" + apierrors "k8s.io/apimachinery/pkg/api/errors" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// GetRulerConfig returns the ruler config spec for a lokistack resource or an error. +// If the config is not found, we skip without an error. +func GetRulerConfig(ctx context.Context, k k8s.Client, req ctrl.Request) (*lokiv1beta1.RulerConfigSpec, error) { + var rc lokiv1beta1.RulerConfig + + key := client.ObjectKey{Name: req.Name, Namespace: req.Namespace} + if err := k.Get(ctx, key, &rc); err != nil { + if apierrors.IsNotFound(err) { + return nil, nil + } + + return nil, kverrors.Wrap(err, "failed to get rulerconfig", "key", key) + } + + return &rc.Spec, nil +} diff --git a/operator/internal/handlers/internal/rules/secrets.go b/operator/internal/handlers/internal/rules/secrets.go new file mode 100644 index 0000000000000..9150d8c96803d --- /dev/null +++ b/operator/internal/handlers/internal/rules/secrets.go @@ -0,0 +1,37 @@ +package rules + +import ( + "github.com/ViaQ/logerr/v2/kverrors" + + lokiv1beta1 "github.com/grafana/loki/operator/api/v1beta1" + "github.com/grafana/loki/operator/internal/manifests" + + corev1 "k8s.io/api/core/v1" +) + +// ExtractRulerSecret reads a k8s secret infto a ruler secret struct if valid. +func ExtractRulerSecret(s *corev1.Secret, t lokiv1beta1.RemoteWriteAuthType) (*manifests.RulerSecret, error) { + switch t { + case lokiv1beta1.BasicAuthorization: + username := s.Data["username"] + if len(username) == 0 { + return nil, kverrors.New("missing basic auth username", "field", "username") + } + + password := s.Data["password"] + if len(password) == 0 { + return nil, kverrors.New("missing basic auth password", "field", "password") + } + + return &manifests.RulerSecret{Username: string(username), Password: string(password)}, nil + case lokiv1beta1.BearerAuthorization: + token := s.Data["bearer_token"] + if len(token) == 0 { + return nil, kverrors.New("missing bearer token", "field", "bearer_token") + } + + return &manifests.RulerSecret{BearerToken: string(token)}, nil + default: + return nil, kverrors.New("unknown ruler secret type", "type", t) + } +} diff --git a/operator/internal/handlers/internal/rules/secrets_test.go b/operator/internal/handlers/internal/rules/secrets_test.go new file mode 100644 index 0000000000000..2799d79093a94 --- /dev/null +++ b/operator/internal/handlers/internal/rules/secrets_test.go @@ -0,0 +1,86 @@ +package rules_test + +import ( + "testing" + + lokiv1beta1 "github.com/grafana/loki/operator/api/v1beta1" + "github.com/grafana/loki/operator/internal/handlers/internal/rules" + "github.com/grafana/loki/operator/internal/manifests" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" +) + +func TestExtractRulerSecret(t *testing.T) { + type test struct { + name string + authType lokiv1beta1.RemoteWriteAuthType + secret *corev1.Secret + wantSecret *manifests.RulerSecret + wantErr bool + } + table := []test{ + { + name: "missing username", + authType: lokiv1beta1.BasicAuthorization, + secret: &corev1.Secret{}, + wantErr: true, + }, + { + name: "missing password", + authType: lokiv1beta1.BasicAuthorization, + secret: &corev1.Secret{ + Data: map[string][]byte{ + "username": []byte("dasd"), + }, + }, + wantErr: true, + }, + { + name: "missing bearer token", + authType: lokiv1beta1.BearerAuthorization, + secret: &corev1.Secret{}, + wantErr: true, + }, + { + name: "valid basic auth", + authType: lokiv1beta1.BasicAuthorization, + secret: &corev1.Secret{ + Data: map[string][]byte{ + "username": []byte("hello"), + "password": []byte("world"), + }, + }, + wantSecret: &manifests.RulerSecret{ + Username: "hello", + Password: "world", + }, + }, + { + name: "valid header auth", + authType: lokiv1beta1.BearerAuthorization, + secret: &corev1.Secret{ + Data: map[string][]byte{ + "bearer_token": []byte("hello world"), + }, + }, + wantSecret: &manifests.RulerSecret{ + BearerToken: "hello world", + }, + }, + } + for _, tst := range table { + tst := tst + t.Run(tst.name, func(t *testing.T) { + t.Parallel() + + s, err := rules.ExtractRulerSecret(tst.secret, tst.authType) + if !tst.wantErr { + require.NoError(t, err) + require.Equal(t, tst.wantSecret, s) + } + if tst.wantErr { + require.NotNil(t, err) + } + }) + } +} diff --git a/operator/internal/handlers/lokistack_create_or_update.go b/operator/internal/handlers/lokistack_create_or_update.go index b08ec605e7d80..3eafd071cda8f 100644 --- a/operator/internal/handlers/lokistack_create_or_update.go +++ b/operator/internal/handlers/lokistack_create_or_update.go @@ -147,12 +147,43 @@ func CreateOrUpdateLokiStack( var ( alertingRules []lokiv1beta1.AlertingRule recordingRules []lokiv1beta1.RecordingRule + rulerConfig *lokiv1beta1.RulerConfigSpec + rulerSecret *manifests.RulerSecret ) if stack.Spec.Rules != nil && stack.Spec.Rules.Enabled { alertingRules, recordingRules, err = rules.List(ctx, k, req.Namespace, stack.Spec.Rules) if err != nil { log.Error(err, "failed to lookup rules", "spec", stack.Spec.Rules) } + + rulerConfig, err = rules.GetRulerConfig(ctx, k, req) + if err != nil { + log.Error(err, "failed to lookup ruler config", "key", req.NamespacedName) + } + + if rulerConfig != nil && rulerConfig.RemoteWriteSpec != nil && rulerConfig.RemoteWriteSpec.ClientSpec != nil { + var rs corev1.Secret + key := client.ObjectKey{Name: rulerConfig.RemoteWriteSpec.ClientSpec.AuthorizationSecretName, Namespace: stack.Namespace} + if err = k.Get(ctx, key, &rs); err != nil { + if apierrors.IsNotFound(err) { + return &status.DegradedError{ + Message: "Missing ruler remote write authorization secret", + Reason: lokiv1beta1.ReasonMissingRulerSecret, + Requeue: false, + } + } + return kverrors.Wrap(err, "failed to lookup lokistack ruler secret", "name", key) + } + + rulerSecret, err = rules.ExtractRulerSecret(&rs, rulerConfig.RemoteWriteSpec.ClientSpec.AuthorizationType) + if err != nil { + return &status.DegradedError{ + Message: "Invalid ruler remote write authorization secret contents", + Reason: lokiv1beta1.ReasonInvalidRulerSecret, + Requeue: false, + } + } + } } // Here we will translate the lokiv1beta1.LokiStack options into manifest options @@ -167,6 +198,10 @@ func CreateOrUpdateLokiStack( ObjectStorage: *objstorage, AlertingRules: alertingRules, RecordingRules: recordingRules, + Ruler: manifests.Ruler{ + Spec: rulerConfig, + Secret: rulerSecret, + }, Tenants: manifests.Tenants{ Secrets: tenantSecrets, Configs: tenantConfigs, diff --git a/operator/internal/manifests/config.go b/operator/internal/manifests/config.go index 6751ce16e971b..08937a553c22d 100644 --- a/operator/internal/manifests/config.go +++ b/operator/internal/manifests/config.go @@ -3,7 +3,9 @@ package manifests import ( "crypto/sha1" "fmt" + "strings" + lokiv1beta1 "github.com/grafana/loki/operator/api/v1beta1" "github.com/grafana/loki/operator/internal/manifests/internal/config" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -44,6 +46,27 @@ func LokiConfigMap(opt Options) (*corev1.ConfigMap, string, error) { func ConfigOptions(opt Options) config.Options { rulerEnabled := opt.Stack.Rules != nil && opt.Stack.Rules.Enabled + var ( + evalInterval, pollInterval string + amConfig *config.AlertManagerConfig + rwConfig *config.RemoteWriteConfig + ) + if rulerEnabled { + rulerEnabled = true + + // Map alertmanager config from CRD to config options + if opt.Ruler.Spec != nil { + evalInterval = string(opt.Ruler.Spec.EvalutionInterval) + pollInterval = string(opt.Ruler.Spec.PollInterval) + amConfig = alertManagerConfig(opt.Ruler.Spec.AlertManagerSpec) + } + + // Map remote write config from CRD to config options + if opt.Ruler.Spec != nil && opt.Ruler.Secret != nil { + rwConfig = remoteWriteConfig(opt.Ruler.Spec.RemoteWriteSpec, opt.Ruler.Secret) + } + } + return config.Options{ Stack: opt.Stack, Namespace: opt.Namespace, @@ -77,6 +100,10 @@ func ConfigOptions(opt Options) config.Options { Ruler: config.Ruler{ Enabled: rulerEnabled, RulesStorageDirectory: rulesStorageDirectory, + EvaluationInterval: evalInterval, + PollInterval: pollInterval, + AlertManager: amConfig, + RemoteWrite: rwConfig, }, } } @@ -84,3 +111,87 @@ func ConfigOptions(opt Options) config.Options { func lokiConfigMapName(stackName string) string { return fmt.Sprintf("%s-config", stackName) } + +func alertManagerConfig(s *lokiv1beta1.AlertManagerSpec) *config.AlertManagerConfig { + if s == nil { + return nil + } + + c := &config.AlertManagerConfig{ + ExternalURL: s.ExternalURL, + ExternalLabels: s.ExternalLabels, + Hosts: strings.Join(s.Endpoints, ","), + EnableV2: s.EnableV2, + } + + if d := s.DiscoverySpec; d != nil { + c.EnableDiscovery = d.EnableSRV + c.RefreshInterval = string(d.RefreshInterval) + } + + if n := s.NotificationQueueSpec; n != nil { + c.QueueCapacity = n.Capacity + c.Timeout = string(n.Timeout) + c.ForOutageTolerance = string(n.ForOutageTolerance) + c.ForGracePeriod = string(n.ForGracePeriod) + c.ResendDelay = string(n.ResendDelay) + } + + return c +} + +func remoteWriteConfig(s *lokiv1beta1.RemoteWriteSpec, rs *RulerSecret) *config.RemoteWriteConfig { + if s == nil || rs == nil { + return nil + } + + c := &config.RemoteWriteConfig{ + Enabled: s.Enabled, + RefreshPeriod: string(s.RefreshPeriod), + } + + if cls := s.ClientSpec; cls != nil { + c.Client = &config.RemoteWriteClientConfig{ + Name: cls.Name, + URL: cls.URL, + RemoteTimeout: string(cls.Timeout), + ProxyURL: cls.ProxyURL, + Headers: cls.AdditionalHeaders, + FollowRedirects: cls.FollowRedirects, + } + + switch cls.AuthorizationType { + case lokiv1beta1.BasicAuthorization: + c.Client.BasicAuthUsername = rs.Username + c.Client.BasicAuthPassword = rs.Password + case lokiv1beta1.BearerAuthorization: + c.Client.BearerToken = rs.BearerToken + } + + for _, cfg := range cls.RelabelConfigs { + c.RelabelConfigs = append(c.RelabelConfigs, config.RemoteWriteRelabelConfig{ + SourceLabels: cfg.SourceLabels, + Separator: cfg.Separator, + TargetLabel: cfg.TargetLabel, + Regex: cfg.Regex, + Modulus: cfg.Modulus, + Replacement: cfg.Replacement, + Action: string(cfg.Action), + }) + } + } + + if q := s.QueueSpec; q != nil { + c.Queue = &config.RemoteWriteQueueConfig{ + Capacity: q.Capacity, + MaxShards: q.MaxShards, + MinShards: q.MinShards, + MaxSamplesPerSend: q.MaxSamplesPerSend, + BatchSendDeadline: string(q.BatchSendDeadline), + MinBackOffPeriod: string(q.MinBackOffPeriod), + MaxBackOffPeriod: string(q.MaxBackOffPeriod), + } + } + + return c +} diff --git a/operator/internal/manifests/internal/config/build_test.go b/operator/internal/manifests/internal/config/build_test.go index 320a39c46f824..8eb695ba00621 100644 --- a/operator/internal/manifests/internal/config/build_test.go +++ b/operator/internal/manifests/internal/config/build_test.go @@ -537,7 +537,7 @@ func TestBuild_ConfigAndRuntimeConfig_CreateLokiConfigFailed(t *testing.T) { require.Empty(t, rCfg) } -func TestBuild_ConfigAndRuntimeConfig_RulerConfigGenerated(t *testing.T) { +func TestBuild_ConfigAndRuntimeConfig_RulerConfigGenerated_WithHeaderAuthorization(t *testing.T) { expCfg := ` --- auth_enabled: true @@ -670,6 +670,710 @@ schema_config: ruler: enable_api: true enable_sharding: true + evaluation_interval: 1m + poll_interval: 1m + external_url: http://alert.me/now + external_labels: + key1: val1 + key2: val2 + alertmanager_url: http://alerthost1,http://alerthost2 + enable_alertmanager_v2: true + enable_alertmanager_discovery: true + alertmanager_refresh_interval: 1m + notification_queue_capacity: 1000 + notification_timeout: 1m + for_outage_tolerance: 10m + for_grace_period: 5m + resend_delay: 2m + remote_write: + enabled: true + config_refresh_period: 1m + client: + name: remote-write-me + url: http://remote.write.me + timeout: 10s + proxy_url: http://proxy.through.me + follow_redirects: true + headers: + more: foryou + less: forme + authorization: + type: bearer + credentials: supersecret + queue_config: + capacity: 1000 + max_shards: 100 + min_shards: 50 + max_samples_per_send: 1000 + batch_send_deadline: 10s + min_backoff: 30ms + max_backoff: 100ms + wal: + dir: /tmp/wal + truncate_frequency: 60m + min_age: 5m + max_age: 4h + rule_path: /tmp/loki + storage: + type: local + local: + directory: /tmp/rules + ring: + kvstore: + store: memberlist +server: + graceful_shutdown_timeout: 5s + grpc_server_min_time_between_pings: '10s' + grpc_server_ping_without_stream_allowed: true + grpc_server_max_concurrent_streams: 1000 + grpc_server_max_recv_msg_size: 104857600 + grpc_server_max_send_msg_size: 104857600 + http_listen_port: 3100 + http_server_idle_timeout: 120s + http_server_write_timeout: 1m + log_level: info +storage_config: + boltdb_shipper: + active_index_directory: /tmp/loki/index + cache_location: /tmp/loki/index_cache + cache_ttl: 24h + resync_interval: 5m + shared_store: s3 + index_gateway_client: + server_address: dns:///loki-index-gateway-grpc-lokistack-dev.default.svc.cluster.local:9095 +tracing: + enabled: false +analytics: + reporting_enabled: true +` + expRCfg := ` +--- +overrides: +` + opts := Options{ + Stack: lokiv1beta1.LokiStackSpec{ + ReplicationFactor: 1, + Limits: &lokiv1beta1.LimitsSpec{ + Global: &lokiv1beta1.LimitsTemplateSpec{ + IngestionLimits: &lokiv1beta1.IngestionLimitSpec{ + IngestionRate: 4, + IngestionBurstSize: 6, + MaxLabelNameLength: 1024, + MaxLabelValueLength: 2048, + MaxLabelNamesPerSeries: 30, + MaxGlobalStreamsPerTenant: 0, + MaxLineSize: 256000, + }, + QueryLimits: &lokiv1beta1.QueryLimitSpec{ + MaxEntriesLimitPerQuery: 5000, + MaxChunksPerQuery: 2000000, + MaxQuerySeries: 500, + }, + }, + }, + }, + Namespace: "test-ns", + Name: "test", + FrontendWorker: Address{ + FQDN: "loki-query-frontend-grpc-lokistack-dev.default.svc.cluster.local", + Port: 9095, + }, + GossipRing: Address{ + FQDN: "loki-gossip-ring-lokistack-dev.default.svc.cluster.local", + Port: 7946, + }, + Querier: Address{ + FQDN: "loki-querier-http-lokistack-dev.default.svc.cluster.local", + Port: 3100, + }, + IndexGateway: Address{ + FQDN: "loki-index-gateway-grpc-lokistack-dev.default.svc.cluster.local", + Port: 9095, + }, + Ruler: Ruler{ + Enabled: true, + RulesStorageDirectory: "/tmp/rules", + EvaluationInterval: "1m", + PollInterval: "1m", + AlertManager: &AlertManagerConfig{ + ExternalURL: "http://alert.me/now", + ExternalLabels: map[string]string{ + "key1": "val1", + "key2": "val2", + }, + Hosts: "http://alerthost1,http://alerthost2", + EnableV2: true, + EnableDiscovery: true, + RefreshInterval: "1m", + QueueCapacity: 1000, + Timeout: "1m", + ForOutageTolerance: "10m", + ForGracePeriod: "5m", + ResendDelay: "2m", + }, + RemoteWrite: &RemoteWriteConfig{ + Enabled: true, + RefreshPeriod: "1m", + Client: &RemoteWriteClientConfig{ + Name: "remote-write-me", + URL: "http://remote.write.me", + RemoteTimeout: "10s", + Headers: map[string]string{ + "more": "foryou", + "less": "forme", + }, + ProxyURL: "http://proxy.through.me", + FollowRedirects: true, + BearerToken: "supersecret", + }, + Queue: &RemoteWriteQueueConfig{ + Capacity: 1000, + MaxShards: 100, + MinShards: 50, + MaxSamplesPerSend: 1000, + BatchSendDeadline: "10s", + MinBackOffPeriod: "30ms", + MaxBackOffPeriod: "100ms", + }, + }, + }, + StorageDirectory: "/tmp/loki", + MaxConcurrent: MaxConcurrent{ + AvailableQuerierCPUCores: 2, + }, + WriteAheadLog: WriteAheadLog{ + Directory: "/tmp/wal", + IngesterMemoryRequest: 5000, + }, + ObjectStorage: storage.Options{ + SharedStore: lokiv1beta1.ObjectStorageSecretS3, + S3: &storage.S3StorageConfig{ + Endpoint: "http://test.default.svc.cluster.local.:9000", + Region: "us-east", + Buckets: "loki", + AccessKeyID: "test", + AccessKeySecret: "test123", + }, + }, + EnableRemoteReporting: true, + } + cfg, rCfg, err := Build(opts) + require.NoError(t, err) + require.YAMLEq(t, expCfg, string(cfg)) + require.YAMLEq(t, expRCfg, string(rCfg)) +} + +func TestBuild_ConfigAndRuntimeConfig_RulerConfigGenerated_WithBasicAuthorization(t *testing.T) { + expCfg := ` +--- +auth_enabled: true +chunk_store_config: + chunk_cache_config: + enable_fifocache: true + fifocache: + max_size_bytes: 500MB +common: + storage: + s3: + s3: http://test.default.svc.cluster.local.:9000 + bucketnames: loki + region: us-east + access_key_id: test + secret_access_key: test123 + s3forcepathstyle: true +compactor: + compaction_interval: 2h + working_directory: /tmp/loki/compactor +frontend: + tail_proxy_url: http://loki-querier-http-lokistack-dev.default.svc.cluster.local:3100 + compress_responses: true + max_outstanding_per_tenant: 256 + log_queries_longer_than: 5s +frontend_worker: + frontend_address: loki-query-frontend-grpc-lokistack-dev.default.svc.cluster.local:9095 + grpc_client_config: + max_send_msg_size: 104857600 + match_max_concurrent: true +ingester: + chunk_block_size: 262144 + chunk_encoding: snappy + chunk_idle_period: 1h + chunk_retain_period: 5m + chunk_target_size: 2097152 + flush_op_timeout: 10m + lifecycler: + final_sleep: 0s + heartbeat_period: 5s + interface_names: + - eth0 + join_after: 30s + num_tokens: 512 + ring: + replication_factor: 1 + heartbeat_timeout: 1m + max_chunk_age: 2h + max_transfer_retries: 0 + wal: + enabled: true + dir: /tmp/wal + replay_memory_ceiling: 2500 +ingester_client: + grpc_client_config: + max_recv_msg_size: 67108864 + remote_timeout: 1s +# NOTE: Keep the order of keys as in Loki docs +# to enable easy diffs when vendoring newer +# Loki releases. +# (See https://grafana.com/docs/loki/latest/configuration/#limits_config) +# +# Values for not exposed fields are taken from the grafana/loki production +# configuration manifests. +# (See https://github.com/grafana/loki/blob/main/production/ksonnet/loki/config.libsonnet) +limits_config: + ingestion_rate_strategy: global + ingestion_rate_mb: 4 + ingestion_burst_size_mb: 6 + max_label_name_length: 1024 + max_label_value_length: 2048 + max_label_names_per_series: 30 + reject_old_samples: true + reject_old_samples_max_age: 168h + creation_grace_period: 10m + enforce_metric_name: false + # Keep max_streams_per_user always to 0 to default + # using max_global_streams_per_user always. + # (See https://github.com/grafana/loki/blob/main/pkg/ingester/limiter.go#L73) + max_streams_per_user: 0 + max_line_size: 256000 + max_entries_limit_per_query: 5000 + max_global_streams_per_user: 0 + max_chunks_per_query: 2000000 + max_query_length: 721h + max_query_parallelism: 32 + max_query_series: 500 + cardinality_limit: 100000 + max_streams_matchers_per_query: 1000 + max_cache_freshness_per_query: 10m + per_stream_rate_limit: 3MB + per_stream_rate_limit_burst: 15MB + split_queries_by_interval: 30m +memberlist: + abort_if_cluster_join_fails: true + bind_port: 7946 + join_members: + - loki-gossip-ring-lokistack-dev.default.svc.cluster.local:7946 + max_join_backoff: 1m + max_join_retries: 10 + min_join_backoff: 1s +querier: + engine: + max_look_back_period: 30s + timeout: 3m + extra_query_delay: 0s + max_concurrent: 2 + query_ingesters_within: 3h + query_timeout: 1m + tail_max_duration: 1h +query_range: + align_queries_with_step: true + cache_results: true + max_retries: 5 + results_cache: + cache: + enable_fifocache: true + fifocache: + max_size_bytes: 500MB + parallelise_shardable_queries: true +schema_config: + configs: + - from: "2020-10-01" + index: + period: 24h + prefix: index_ + object_store: s3 + schema: v11 + store: boltdb-shipper +ruler: + enable_api: true + enable_sharding: true + evaluation_interval: 1m + poll_interval: 1m + external_url: http://alert.me/now + external_labels: + key1: val1 + key2: val2 + alertmanager_url: http://alerthost1,http://alerthost2 + enable_alertmanager_v2: true + enable_alertmanager_discovery: true + alertmanager_refresh_interval: 1m + notification_queue_capacity: 1000 + notification_timeout: 1m + for_outage_tolerance: 10m + for_grace_period: 5m + resend_delay: 2m + remote_write: + enabled: true + config_refresh_period: 1m + client: + name: remote-write-me + url: http://remote.write.me + timeout: 10s + proxy_url: http://proxy.through.me + follow_redirects: true + headers: + more: foryou + less: forme + basic_auth: + username: user + password: passwd + queue_config: + capacity: 1000 + max_shards: 100 + min_shards: 50 + max_samples_per_send: 1000 + batch_send_deadline: 10s + min_backoff: 30ms + max_backoff: 100ms + wal: + dir: /tmp/wal + truncate_frequency: 60m + min_age: 5m + max_age: 4h + rule_path: /tmp/loki + storage: + type: local + local: + directory: /tmp/rules + ring: + kvstore: + store: memberlist +server: + graceful_shutdown_timeout: 5s + grpc_server_min_time_between_pings: '10s' + grpc_server_ping_without_stream_allowed: true + grpc_server_max_concurrent_streams: 1000 + grpc_server_max_recv_msg_size: 104857600 + grpc_server_max_send_msg_size: 104857600 + http_listen_port: 3100 + http_server_idle_timeout: 120s + http_server_write_timeout: 1m + log_level: info +storage_config: + boltdb_shipper: + active_index_directory: /tmp/loki/index + cache_location: /tmp/loki/index_cache + cache_ttl: 24h + resync_interval: 5m + shared_store: s3 + index_gateway_client: + server_address: dns:///loki-index-gateway-grpc-lokistack-dev.default.svc.cluster.local:9095 +tracing: + enabled: false +analytics: + reporting_enabled: true +` + expRCfg := ` +--- +overrides: +` + opts := Options{ + Stack: lokiv1beta1.LokiStackSpec{ + ReplicationFactor: 1, + Limits: &lokiv1beta1.LimitsSpec{ + Global: &lokiv1beta1.LimitsTemplateSpec{ + IngestionLimits: &lokiv1beta1.IngestionLimitSpec{ + IngestionRate: 4, + IngestionBurstSize: 6, + MaxLabelNameLength: 1024, + MaxLabelValueLength: 2048, + MaxLabelNamesPerSeries: 30, + MaxGlobalStreamsPerTenant: 0, + MaxLineSize: 256000, + }, + QueryLimits: &lokiv1beta1.QueryLimitSpec{ + MaxEntriesLimitPerQuery: 5000, + MaxChunksPerQuery: 2000000, + MaxQuerySeries: 500, + }, + }, + }, + }, + Namespace: "test-ns", + Name: "test", + FrontendWorker: Address{ + FQDN: "loki-query-frontend-grpc-lokistack-dev.default.svc.cluster.local", + Port: 9095, + }, + GossipRing: Address{ + FQDN: "loki-gossip-ring-lokistack-dev.default.svc.cluster.local", + Port: 7946, + }, + Querier: Address{ + FQDN: "loki-querier-http-lokistack-dev.default.svc.cluster.local", + Port: 3100, + }, + IndexGateway: Address{ + FQDN: "loki-index-gateway-grpc-lokistack-dev.default.svc.cluster.local", + Port: 9095, + }, + Ruler: Ruler{ + Enabled: true, + RulesStorageDirectory: "/tmp/rules", + EvaluationInterval: "1m", + PollInterval: "1m", + AlertManager: &AlertManagerConfig{ + ExternalURL: "http://alert.me/now", + ExternalLabels: map[string]string{ + "key1": "val1", + "key2": "val2", + }, + Hosts: "http://alerthost1,http://alerthost2", + EnableV2: true, + EnableDiscovery: true, + RefreshInterval: "1m", + QueueCapacity: 1000, + Timeout: "1m", + ForOutageTolerance: "10m", + ForGracePeriod: "5m", + ResendDelay: "2m", + }, + RemoteWrite: &RemoteWriteConfig{ + Enabled: true, + RefreshPeriod: "1m", + Client: &RemoteWriteClientConfig{ + Name: "remote-write-me", + URL: "http://remote.write.me", + RemoteTimeout: "10s", + Headers: map[string]string{ + "more": "foryou", + "less": "forme", + }, + ProxyURL: "http://proxy.through.me", + FollowRedirects: true, + BasicAuthUsername: "user", + BasicAuthPassword: "passwd", + }, + Queue: &RemoteWriteQueueConfig{ + Capacity: 1000, + MaxShards: 100, + MinShards: 50, + MaxSamplesPerSend: 1000, + BatchSendDeadline: "10s", + MinBackOffPeriod: "30ms", + MaxBackOffPeriod: "100ms", + }, + }, + }, + StorageDirectory: "/tmp/loki", + MaxConcurrent: MaxConcurrent{ + AvailableQuerierCPUCores: 2, + }, + WriteAheadLog: WriteAheadLog{ + Directory: "/tmp/wal", + IngesterMemoryRequest: 5000, + }, + ObjectStorage: storage.Options{ + SharedStore: lokiv1beta1.ObjectStorageSecretS3, + S3: &storage.S3StorageConfig{ + Endpoint: "http://test.default.svc.cluster.local.:9000", + Region: "us-east", + Buckets: "loki", + AccessKeyID: "test", + AccessKeySecret: "test123", + }, + }, + EnableRemoteReporting: true, + } + cfg, rCfg, err := Build(opts) + require.NoError(t, err) + require.YAMLEq(t, expCfg, string(cfg)) + require.YAMLEq(t, expRCfg, string(rCfg)) +} + +func TestBuild_ConfigAndRuntimeConfig_RulerConfigGenerated_WithRelabelConfigs(t *testing.T) { + expCfg := ` +--- +auth_enabled: true +chunk_store_config: + chunk_cache_config: + enable_fifocache: true + fifocache: + max_size_bytes: 500MB +common: + storage: + s3: + s3: http://test.default.svc.cluster.local.:9000 + bucketnames: loki + region: us-east + access_key_id: test + secret_access_key: test123 + s3forcepathstyle: true +compactor: + compaction_interval: 2h + working_directory: /tmp/loki/compactor +frontend: + tail_proxy_url: http://loki-querier-http-lokistack-dev.default.svc.cluster.local:3100 + compress_responses: true + max_outstanding_per_tenant: 256 + log_queries_longer_than: 5s +frontend_worker: + frontend_address: loki-query-frontend-grpc-lokistack-dev.default.svc.cluster.local:9095 + grpc_client_config: + max_send_msg_size: 104857600 + match_max_concurrent: true +ingester: + chunk_block_size: 262144 + chunk_encoding: snappy + chunk_idle_period: 1h + chunk_retain_period: 5m + chunk_target_size: 2097152 + flush_op_timeout: 10m + lifecycler: + final_sleep: 0s + heartbeat_period: 5s + interface_names: + - eth0 + join_after: 30s + num_tokens: 512 + ring: + replication_factor: 1 + heartbeat_timeout: 1m + max_chunk_age: 2h + max_transfer_retries: 0 + wal: + enabled: true + dir: /tmp/wal + replay_memory_ceiling: 2500 +ingester_client: + grpc_client_config: + max_recv_msg_size: 67108864 + remote_timeout: 1s +# NOTE: Keep the order of keys as in Loki docs +# to enable easy diffs when vendoring newer +# Loki releases. +# (See https://grafana.com/docs/loki/latest/configuration/#limits_config) +# +# Values for not exposed fields are taken from the grafana/loki production +# configuration manifests. +# (See https://github.com/grafana/loki/blob/main/production/ksonnet/loki/config.libsonnet) +limits_config: + ingestion_rate_strategy: global + ingestion_rate_mb: 4 + ingestion_burst_size_mb: 6 + max_label_name_length: 1024 + max_label_value_length: 2048 + max_label_names_per_series: 30 + reject_old_samples: true + reject_old_samples_max_age: 168h + creation_grace_period: 10m + enforce_metric_name: false + # Keep max_streams_per_user always to 0 to default + # using max_global_streams_per_user always. + # (See https://github.com/grafana/loki/blob/main/pkg/ingester/limiter.go#L73) + max_streams_per_user: 0 + max_line_size: 256000 + max_entries_limit_per_query: 5000 + max_global_streams_per_user: 0 + max_chunks_per_query: 2000000 + max_query_length: 721h + max_query_parallelism: 32 + max_query_series: 500 + cardinality_limit: 100000 + max_streams_matchers_per_query: 1000 + max_cache_freshness_per_query: 10m + per_stream_rate_limit: 3MB + per_stream_rate_limit_burst: 15MB + split_queries_by_interval: 30m +memberlist: + abort_if_cluster_join_fails: true + bind_port: 7946 + join_members: + - loki-gossip-ring-lokistack-dev.default.svc.cluster.local:7946 + max_join_backoff: 1m + max_join_retries: 10 + min_join_backoff: 1s +querier: + engine: + max_look_back_period: 30s + timeout: 3m + extra_query_delay: 0s + max_concurrent: 2 + query_ingesters_within: 3h + query_timeout: 1m + tail_max_duration: 1h +query_range: + align_queries_with_step: true + cache_results: true + max_retries: 5 + results_cache: + cache: + enable_fifocache: true + fifocache: + max_size_bytes: 500MB + parallelise_shardable_queries: true +schema_config: + configs: + - from: "2020-10-01" + index: + period: 24h + prefix: index_ + object_store: s3 + schema: v11 + store: boltdb-shipper +ruler: + enable_api: true + enable_sharding: true + evaluation_interval: 1m + poll_interval: 1m + external_url: http://alert.me/now + external_labels: + key1: val1 + key2: val2 + alertmanager_url: http://alerthost1,http://alerthost2 + enable_alertmanager_v2: true + enable_alertmanager_discovery: true + alertmanager_refresh_interval: 1m + notification_queue_capacity: 1000 + notification_timeout: 1m + for_outage_tolerance: 10m + for_grace_period: 5m + resend_delay: 2m + remote_write: + enabled: true + config_refresh_period: 1m + client: + name: remote-write-me + url: http://remote.write.me + timeout: 10s + proxy_url: http://proxy.through.me + follow_redirects: true + headers: + more: foryou + less: forme + write_relabel_configs: + - source_labels: ["labela","labelb"] + regex: "ALERTS.*" + action: "drop" + separator: "\\" + replacement: "$1" + - source_labels: ["labelc","labeld"] + regex: "ALERTS.*" + action: "drop" + separator: "" + replacement: "$1" + target_label: "labeld" + modulus: 123 + basic_auth: + username: user + password: passwd + queue_config: + capacity: 1000 + max_shards: 100 + min_shards: 50 + max_samples_per_send: 1000 + batch_send_deadline: 10s + min_backoff: 30ms + max_backoff: 100ms wal: dir: /tmp/wal truncate_frequency: 60m @@ -755,6 +1459,67 @@ overrides: Ruler: Ruler{ Enabled: true, RulesStorageDirectory: "/tmp/rules", + EvaluationInterval: "1m", + PollInterval: "1m", + AlertManager: &AlertManagerConfig{ + ExternalURL: "http://alert.me/now", + ExternalLabels: map[string]string{ + "key1": "val1", + "key2": "val2", + }, + Hosts: "http://alerthost1,http://alerthost2", + EnableV2: true, + EnableDiscovery: true, + RefreshInterval: "1m", + QueueCapacity: 1000, + Timeout: "1m", + ForOutageTolerance: "10m", + ForGracePeriod: "5m", + ResendDelay: "2m", + }, + RemoteWrite: &RemoteWriteConfig{ + Enabled: true, + RefreshPeriod: "1m", + Client: &RemoteWriteClientConfig{ + Name: "remote-write-me", + URL: "http://remote.write.me", + RemoteTimeout: "10s", + Headers: map[string]string{ + "more": "foryou", + "less": "forme", + }, + ProxyURL: "http://proxy.through.me", + FollowRedirects: true, + BasicAuthUsername: "user", + BasicAuthPassword: "passwd", + }, + RelabelConfigs: []RemoteWriteRelabelConfig{ + { + SourceLabels: []string{"labela", "labelb"}, + Regex: "ALERTS.*", + Action: "drop", + Separator: "\\", + Replacement: "$1", + }, + { + SourceLabels: []string{"labelc", "labeld"}, + Regex: "ALERTS.*", + Action: "drop", + Replacement: "$1", + TargetLabel: "labeld", + Modulus: 123, + }, + }, + Queue: &RemoteWriteQueueConfig{ + Capacity: 1000, + MaxShards: 100, + MinShards: 50, + MaxSamplesPerSend: 1000, + BatchSendDeadline: "10s", + MinBackOffPeriod: "30ms", + MaxBackOffPeriod: "100ms", + }, + }, }, StorageDirectory: "/tmp/loki", MaxConcurrent: MaxConcurrent{ diff --git a/operator/internal/manifests/internal/config/loki-config.yaml b/operator/internal/manifests/internal/config/loki-config.yaml index 8d186d4b5da56..36816d9460936 100644 --- a/operator/internal/manifests/internal/config/loki-config.yaml +++ b/operator/internal/manifests/internal/config/loki-config.yaml @@ -161,6 +161,121 @@ schema_config: ruler: enable_api: true enable_sharding: true + {{ with .Ruler.EvaluationInterval }} + evaluation_interval: {{ . }} + {{ end }} + {{ with .Ruler.PollInterval }} + poll_interval: {{ . }} + {{ end }} + {{ with .Ruler.AlertManager }} + external_url: {{ .ExternalURL }} + {{ with .ExternalLabels }} + external_labels: + {{ range $name, $value := . }} + {{ $name }}: {{ $value }} + {{ end }} + {{ end}} + alertmanager_url: {{ .Hosts }} + {{ if .EnableV2 }} + enable_alertmanager_v2: {{ .EnableV2 }} + {{ end }} + {{ if .EnableDiscovery }} + enable_alertmanager_discovery: {{ .EnableDiscovery }} + alertmanager_refresh_interval: {{ .RefreshInterval }} + {{ end }} + {{ if .QueueCapacity }} + notification_queue_capacity: {{ .QueueCapacity }} + {{ end }} + {{ if .Timeout }} + notification_timeout: {{ .Timeout }} + {{ end }} + {{ if .ForOutageTolerance }} + for_outage_tolerance: {{ .ForOutageTolerance }} + {{ end }} + {{ if .ForGracePeriod }} + for_grace_period: {{ .ForGracePeriod }} + {{ end }} + {{ if .ResendDelay }} + resend_delay: {{ .ResendDelay }} + {{ end }} + {{ end}} + {{ with .Ruler.RemoteWrite }} + {{ if .Enabled }} + remote_write: + enabled: {{ .Enabled }} + config_refresh_period: {{ .RefreshPeriod }} + client: + {{ with .Client }} + name: {{ .Name }} + url: {{ .URL }} + timeout: {{ .RemoteTimeout }} + proxy_url: {{ .ProxyURL }} + follow_redirects: {{ .FollowRedirects }} + headers: + {{ range $key, $val := .Headers }} + "{{ $key }}": "{{ $val }}" + {{ end }} + {{ if and .BasicAuthUsername .BasicAuthPassword }} + basic_auth: + username: {{ .BasicAuthUsername }} + password: {{ .BasicAuthPassword }} + {{ end }} + {{ if .BearerToken }} + authorization: + type: bearer + credentials: {{ .BearerToken }} + {{ end }} + {{ end}} + {{ with .RelabelConfigs }} + write_relabel_configs: + {{ range $k, $cfg := . }} + - {{ if $cfg.SourceLabels}} + source_labels: {{ $cfg.SourceLabelsString }} + {{ end }} + {{ if $cfg.Regex }} + regex: {{ $cfg.Regex }} + {{ end }} + {{ if $cfg.Action }} + action: {{ $cfg.Action }} + {{ end }} + separator: {{ $cfg.SeparatorString }} + {{ if $cfg.Replacement }} + replacement: {{ $cfg.Replacement }} + {{ end }} + {{ if $cfg.TargetLabel }} + target_label: {{ $cfg.TargetLabel }} + {{ end }} + {{ if $cfg.Modulus }} + modulus: {{ $cfg.Modulus }} + {{ end }} + {{ end}} + {{end }} + {{ with .Queue }} + queue_config: + {{ if .Capacity }} + capacity: {{ .Capacity }} + {{ end }} + {{ if .MaxShards }} + max_shards: {{ .MaxShards }} + {{ end }} + {{ if .MinShards }} + min_shards: {{ .MinShards }} + {{ end }} + {{ if .MaxSamplesPerSend }} + max_samples_per_send: {{ .MaxSamplesPerSend }} + {{ end }} + {{ if .BatchSendDeadline }} + batch_send_deadline: {{ .BatchSendDeadline }} + {{ end }} + {{ if .MinBackOffPeriod }} + min_backoff: {{ .MinBackOffPeriod }} + {{ end }} + {{ if .MaxBackOffPeriod }} + max_backoff: {{ .MaxBackOffPeriod }} + {{ end }} + {{ end }} + {{ end }} + {{ end }} wal: dir: {{ .WriteAheadLog.Directory }} truncate_frequency: 60m diff --git a/operator/internal/manifests/internal/config/options.go b/operator/internal/manifests/internal/config/options.go index 63efe999722da..e75533ca8f69c 100644 --- a/operator/internal/manifests/internal/config/options.go +++ b/operator/internal/manifests/internal/config/options.go @@ -3,6 +3,7 @@ package config import ( "fmt" "math" + "strings" lokiv1beta1 "github.com/grafana/loki/operator/api/v1beta1" "github.com/grafana/loki/operator/internal/manifests/storage" @@ -39,6 +40,100 @@ type Address struct { type Ruler struct { Enabled bool RulesStorageDirectory string + EvaluationInterval string + PollInterval string + AlertManager *AlertManagerConfig + RemoteWrite *RemoteWriteConfig +} + +// AlertManagerConfig for ruler alertmanager config +type AlertManagerConfig struct { + Hosts string + ExternalURL string + ExternalLabels map[string]string + EnableV2 bool + + // DNS Discovery + EnableDiscovery bool + RefreshInterval string + + // Notification config + QueueCapacity int32 + Timeout string + ForOutageTolerance string + ForGracePeriod string + ResendDelay string +} + +// RemoteWriteConfig for ruler remote write config +type RemoteWriteConfig struct { + Enabled bool + RefreshPeriod string + Client *RemoteWriteClientConfig + Queue *RemoteWriteQueueConfig + RelabelConfigs []RemoteWriteRelabelConfig +} + +// RemoteWriteClientConfig for ruler remote write client config +type RemoteWriteClientConfig struct { + Name string + URL string + RemoteTimeout string + Headers map[string]string + ProxyURL string + FollowRedirects bool + + // Authentication + BasicAuthUsername string + BasicAuthPassword string + BearerToken string +} + +// RemoteWriteQueueConfig for ruler remote write queue config +type RemoteWriteQueueConfig struct { + Capacity int32 + MaxShards int32 + MinShards int32 + MaxSamplesPerSend int32 + BatchSendDeadline string + MinBackOffPeriod string + MaxBackOffPeriod string +} + +// RemoteWriteRelabelConfig for ruler remote write relabel configs. +type RemoteWriteRelabelConfig struct { + SourceLabels []string + Separator string + TargetLabel string + Regex string + Modulus uint64 + Replacement string + Action string +} + +// SourceLabelsString returns a string array of source labels. +func (r RemoteWriteRelabelConfig) SourceLabelsString() string { + var sb strings.Builder + sb.WriteString("[") + for i, labelname := range r.SourceLabels { + sb.WriteString(fmt.Sprintf(`"%s"`, labelname)) + + if i != len(r.SourceLabels)-1 { + sb.WriteString(",") + } + } + sb.WriteString("]") + + return sb.String() +} + +// SeparatorString returns the user-defined separator or per default semicolon. +func (r RemoteWriteRelabelConfig) SeparatorString() string { + if r.Separator == "" { + return `""` + } + + return r.Separator } // MaxConcurrent for concurrent query processing. diff --git a/operator/internal/manifests/options.go b/operator/internal/manifests/options.go index 7c056b5153ecc..ab836aef76db2 100644 --- a/operator/internal/manifests/options.go +++ b/operator/internal/manifests/options.go @@ -24,6 +24,7 @@ type Options struct { AlertingRules []lokiv1beta1.AlertingRule RecordingRules []lokiv1beta1.RecordingRule + Ruler Ruler ObjectStorage storage.Options @@ -77,3 +78,19 @@ type TenantOPASpec struct{} type TenantOpenShiftSpec struct { CookieSecret string } + +// Ruler configuration for manifests generation. +type Ruler struct { + Spec *lokiv1beta1.RulerConfigSpec + Secret *RulerSecret +} + +// RulerSecret defines the ruler secret for remote write client auth +type RulerSecret struct { + // Username for basic authentication only. + Username string + // Password for basic authentication only. + Password string + // BearerToken contains the token used for bearer authentication. + BearerToken string +} diff --git a/operator/main.go b/operator/main.go index 0d2bc1c95c3b9..74996c4d25e6e 100644 --- a/operator/main.go +++ b/operator/main.go @@ -148,6 +148,13 @@ func main() { logger.Error(err, "unable to create webhook", "webhook", "RecordingRule") os.Exit(1) } + if err = (&controllers.RulerConfigReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + logger.Error(err, "unable to create controller", "controller", "RulerConfig") + os.Exit(1) + } // +kubebuilder:scaffold:builder if err = mgr.AddHealthzCheck("health", healthz.Ping); err != nil {