diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index 4322497787d9a..1bb660a6bb6fa 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [9405](https://github.com/grafana/loki/pull/9405) **periklis**: Add support for configuring HTTP server timeouts - [9378](https://github.com/grafana/loki/pull/9378) **aminesnow**: Add zone aware API spec validation - [9408](https://github.com/grafana/loki/pull/9408) **JoaoBraveCoding**: Add PodAntiAffinity overwrites per component - [9429](https://github.com/grafana/loki/pull/9429) **aminesnow**: Add default TopologySpreadContraints to Gateway diff --git a/operator/apis/loki/v1/lokistack_types.go b/operator/apis/loki/v1/lokistack_types.go index 13487fef96cd0..ffa87248a2805 100644 --- a/operator/apis/loki/v1/lokistack_types.go +++ b/operator/apis/loki/v1/lokistack_types.go @@ -561,7 +561,7 @@ type QueryLimitSpec struct { // // +optional // +kubebuilder:validation:Optional - // +kubebuilder:default:="1m" + // +kubebuilder:default:="3m" // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Query Timeout" QueryTimeout string `json:"queryTimeout,omitempty"` } @@ -913,6 +913,8 @@ const ( ReasonMissingGatewayOpenShiftBaseDomain LokiStackConditionReason = "MissingGatewayOpenShiftBaseDomain" // ReasonFailedCertificateRotation when the reconciler cannot rotate any of the required TLS certificates. ReasonFailedCertificateRotation LokiStackConditionReason = "FailedCertificateRotation" + // ReasonQueryTimeoutInvalid when the QueryTimeout can not be parsed. + ReasonQueryTimeoutInvalid LokiStackConditionReason = "ReasonQueryTimeoutInvalid" ) // PodStatusMap defines the type for mapping pod status to pod name. diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 2eeebf0f1928b..b2a2045c762ad 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:main-ac1c1fd - createdAt: "2023-05-11T08:04:29Z" + createdAt: "2023-05-22T15:22:48Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml index ff8522e4a84e1..658b0644090f4 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -151,7 +151,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string @@ -264,7 +264,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 3463595b0b2de..8242afb0436a3 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:main-ac1c1fd - createdAt: "2023-05-11T08:04:26Z" + createdAt: "2023-05-22T15:22:44Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown diff --git a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml index 54f79ddb76bf9..60f83f9f07462 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml @@ -151,7 +151,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string @@ -264,7 +264,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index 6a519756003af..8c34ad05b308f 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:v0.1.0 - createdAt: "2023-05-11T08:04:32Z" + createdAt: "2023-05-22T15:22:53Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements diff --git a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml index 24037878ac6d2..ad8189744c786 100644 --- a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -151,7 +151,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string @@ -264,7 +264,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string diff --git a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml index 3f1ab640fabc8..4821a9d89b1a7 100644 --- a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml +++ b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml @@ -134,7 +134,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string @@ -247,7 +247,7 @@ spec: format: int32 type: integer queryTimeout: - default: 1m + default: 3m description: Timeout when querying ingesters or storage during the execution of a query request. type: string diff --git a/operator/docs/operator/api.md b/operator/docs/operator/api.md index 7aaa5e4bca670..e5dc354985664 100644 --- a/operator/docs/operator/api.md +++ b/operator/docs/operator/api.md @@ -1302,6 +1302,21 @@ the component onto it.

the component onto it.

+ + +podAntiAffinity
+ + +Kubernetes core/v1.PodAntiAffinity + + + + +(Optional) +

PodAntiAffinity defines the pod anti affinity scheduling rules to schedule pods +of a component.

+ + @@ -1553,6 +1568,9 @@ for the ruler is missing.

"PendingComponents"

ReasonPendingComponents when all/some LokiStack components pending dependencies

+

"ReasonQueryTimeoutInvalid"

+

ReasonQueryTimeoutInvalid when the QueryTimeout can not be parsed.

+

"ReadyComponents"

ReasonReadyComponents when all LokiStack components are ready to serve traffic.

@@ -3329,7 +3347,8 @@ int32 (Optional) -

Zones defines an array of ZoneSpec that the scheduler will try to satisfy.

+

Zones defines an array of ZoneSpec that the scheduler will try to satisfy. +IMPORTANT: Make sure that the replication factor defined is less than or equal to the number of available zones.

diff --git a/operator/internal/handlers/lokistack_create_or_update.go b/operator/internal/handlers/lokistack_create_or_update.go index c18d634b3f179..7f10e1c0f3e3c 100644 --- a/operator/internal/handlers/lokistack_create_or_update.go +++ b/operator/internal/handlers/lokistack_create_or_update.go @@ -269,6 +269,16 @@ func CreateOrUpdateLokiStack( certRotationRequiredAt = stack.Annotations[manifests.AnnotationCertRotationRequiredAt] } + timeoutConfig, err := manifests.NewTimeoutConfig(stack.Spec.Limits) + if err != nil { + ll.Error(err, "failed to parse query timeout") + return &status.DegradedError{ + Message: fmt.Sprintf("Error parsing query timeout: %s", err), + Reason: lokiv1.ReasonQueryTimeoutInvalid, + Requeue: false, + } + } + // Here we will translate the lokiv1.LokiStack options into manifest options opts := manifests.Options{ Name: req.Name, @@ -286,6 +296,7 @@ func CreateOrUpdateLokiStack( Spec: rulerConfig, Secret: rulerSecret, }, + Timeouts: timeoutConfig, Tenants: manifests.Tenants{ Secrets: tenantSecrets, Configs: tenantConfigs, diff --git a/operator/internal/handlers/lokistack_create_or_update_test.go b/operator/internal/handlers/lokistack_create_or_update_test.go index 933d704ba27c4..7c5a099d30805 100644 --- a/operator/internal/handlers/lokistack_create_or_update_test.go +++ b/operator/internal/handlers/lokistack_create_or_update_test.go @@ -1385,6 +1385,78 @@ func TestCreateOrUpdateLokiStack_MissingTenantsSpec_SetDegraded(t *testing.T) { require.Equal(t, degradedErr, err) } +func TestCreateOrUpdateLokiStack_WhenInvalidQueryTimeout_SetDegraded(t *testing.T) { + sw := &k8sfakes.FakeStatusWriter{} + k := &k8sfakes.FakeClient{} + r := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: "my-stack", + Namespace: "some-ns", + }, + } + + degradedErr := &status.DegradedError{ + Message: `Error parsing query timeout: time: invalid duration "invalid"`, + Reason: lokiv1.ReasonQueryTimeoutInvalid, + Requeue: false, + } + + stack := &lokiv1.LokiStack{ + TypeMeta: metav1.TypeMeta{ + Kind: "LokiStack", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "some-ns", + UID: "b23f9a38-9672-499f-8c29-15ede74d3ece", + }, + Spec: lokiv1.LokiStackSpec{ + Size: lokiv1.SizeOneXExtraSmall, + Storage: lokiv1.ObjectStorageSpec{ + Schemas: []lokiv1.ObjectStorageSchema{ + { + Version: lokiv1.ObjectStorageSchemaV12, + EffectiveDate: "2023-05-22", + }, + }, + Secret: lokiv1.ObjectStorageSecretSpec{ + Name: defaultSecret.Name, + Type: lokiv1.ObjectStorageSecretS3, + }, + }, + Tenants: &lokiv1.TenantsSpec{ + Mode: "openshift", + }, + Limits: &lokiv1.LimitsSpec{ + Global: &lokiv1.LimitsTemplateSpec{ + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "invalid", + }, + }, + }, + }, + } + + // Create looks up the CR first, so we need to return our fake stack + k.GetStub = func(_ context.Context, name types.NamespacedName, object client.Object, _ ...client.GetOption) error { + if r.Name == name.Name && r.Namespace == name.Namespace { + k.SetClientObject(object, stack) + } + if defaultSecret.Name == name.Name { + k.SetClientObject(object, &defaultSecret) + } + return nil + } + + k.StatusStub = func() client.StatusWriter { return sw } + + err := handlers.CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + + // make sure error is returned + require.Error(t, err) + require.Equal(t, degradedErr, err) +} + func TestCreateOrUpdateLokiStack_RemovesRulerResourcesWhenDisabled(t *testing.T) { sw := &k8sfakes.FakeStatusWriter{} k := &k8sfakes.FakeClient{} diff --git a/operator/internal/manifests/build_test.go b/operator/internal/manifests/build_test.go index ff150f696c8d7..b6d8b2ad0ebe2 100644 --- a/operator/internal/manifests/build_test.go +++ b/operator/internal/manifests/build_test.go @@ -36,6 +36,7 @@ func TestApplyUserOptions_OverrideDefaults(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, } err := ApplyDefaultSettings(&opt) defs := internal.StackSizeTable[size] @@ -78,6 +79,7 @@ func TestApplyUserOptions_AlwaysSetCompactorReplicasToOne(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, } err := ApplyDefaultSettings(&opt) defs := internal.StackSizeTable[size] @@ -232,6 +234,7 @@ func TestBuildAll_WithFeatureGates_ServiceMonitors(t *testing.T) { ServingCertsService: false, }, }, + Timeouts: defaultTimeoutConfig, }, }, { @@ -250,6 +253,7 @@ func TestBuildAll_WithFeatureGates_ServiceMonitors(t *testing.T) { ServingCertsService: false, }, }, + Timeouts: defaultTimeoutConfig, }, }, } @@ -292,6 +296,7 @@ func TestBuildAll_WithFeatureGates_OpenShift_ServingCertsService(t *testing.T) { ServingCertsService: false, }, }, + Timeouts: defaultTimeoutConfig, }, }, { @@ -309,6 +314,7 @@ func TestBuildAll_WithFeatureGates_OpenShift_ServingCertsService(t *testing.T) { ServingCertsService: true, }, }, + Timeouts: defaultTimeoutConfig, }, }, } @@ -349,6 +355,7 @@ func TestBuildAll_WithFeatureGates_HTTPEncryption(t *testing.T) { Gates: configv1.FeatureGates{ HTTPEncryption: true, }, + Timeouts: defaultTimeoutConfig, } err := ApplyDefaultSettings(&opts) @@ -422,6 +429,7 @@ func TestBuildAll_WithFeatureGates_ServiceMonitorTLSEndpoints(t *testing.T) { HTTPEncryption: true, ServiceMonitorTLSEndpoints: true, }, + Timeouts: defaultTimeoutConfig, } err := ApplyDefaultSettings(&opts) @@ -526,6 +534,7 @@ func TestBuildAll_WithFeatureGates_GRPCEncryption(t *testing.T) { Gates: configv1.FeatureGates{ GRPCEncryption: false, }, + Timeouts: defaultTimeoutConfig, }, }, { @@ -568,6 +577,7 @@ func TestBuildAll_WithFeatureGates_GRPCEncryption(t *testing.T) { Gates: configv1.FeatureGates{ GRPCEncryption: true, }, + Timeouts: defaultTimeoutConfig, }, }, } @@ -692,6 +702,7 @@ func TestBuildAll_WithFeatureGates_RuntimeSeccompProfile(t *testing.T) { Gates: configv1.FeatureGates{ RuntimeSeccompProfile: false, }, + Timeouts: defaultTimeoutConfig, }, }, { @@ -734,6 +745,7 @@ func TestBuildAll_WithFeatureGates_RuntimeSeccompProfile(t *testing.T) { Gates: configv1.FeatureGates{ RuntimeSeccompProfile: true, }, + Timeouts: defaultTimeoutConfig, }, }, } @@ -797,6 +809,7 @@ func TestBuildAll_WithFeatureGates_LokiStackGateway(t *testing.T) { HTTPEncryption: true, ServiceMonitorTLSEndpoints: false, }, + Timeouts: defaultTimeoutConfig, }, }, { @@ -835,6 +848,7 @@ func TestBuildAll_WithFeatureGates_LokiStackGateway(t *testing.T) { HTTPEncryption: true, ServiceMonitorTLSEndpoints: true, }, + Timeouts: defaultTimeoutConfig, }, }, } @@ -873,6 +887,7 @@ func TestBuildAll_WithFeatureGates_LokiStackAlerts(t *testing.T) { ServiceMonitors: false, LokiStackAlerts: false, }, + Timeouts: defaultTimeoutConfig, }, }, { @@ -887,6 +902,7 @@ func TestBuildAll_WithFeatureGates_LokiStackAlerts(t *testing.T) { ServiceMonitors: true, LokiStackAlerts: true, }, + Timeouts: defaultTimeoutConfig, }, }, } diff --git a/operator/internal/manifests/config.go b/operator/internal/manifests/config.go index 4cc36623085e8..6ac16a2add034 100644 --- a/operator/internal/manifests/config.go +++ b/operator/internal/manifests/config.go @@ -172,6 +172,7 @@ func ConfigOptions(opt Options) config.Options { IngesterMemoryRequest: opt.ResourceRequirements.Ingester.Requests.Memory().Value(), }, ObjectStorage: opt.ObjectStorage, + HTTPTimeouts: opt.Timeouts.Loki, EnableRemoteReporting: opt.Gates.GrafanaLabsUsageReport, Ruler: config.Ruler{ Enabled: rulerEnabled, diff --git a/operator/internal/manifests/config_test.go b/operator/internal/manifests/config_test.go index de235af16f141..f0df0593a43d0 100644 --- a/operator/internal/manifests/config_test.go +++ b/operator/internal/manifests/config_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "math/rand" "testing" + "time" "github.com/google/uuid" @@ -41,11 +42,22 @@ func TestConfigOptions_UserOptionsTakePrecedence(t *testing.T) { assert.JSONEq(t, string(expected), string(actual)) } +func testTimeoutConfig() TimeoutConfig { + return TimeoutConfig{ + Loki: config.HTTPTimeoutConfig{ + IdleTimeout: 1 * time.Second, + ReadTimeout: 1 * time.Minute, + WriteTimeout: 10 * time.Minute, + }, + } +} + func randomConfigOptions() Options { return Options{ Name: uuid.New().String(), Namespace: uuid.New().String(), Image: uuid.New().String(), + Timeouts: testTimeoutConfig(), Stack: lokiv1.LokiStackSpec{ Size: lokiv1.SizeOneXExtraSmall, Storage: lokiv1.ObjectStorageSpec{}, @@ -256,6 +268,7 @@ func TestConfigOptions_GossipRingConfig(t *testing.T) { Name: "my-stack", Namespace: "my-ns", Stack: tc.spec, + Timeouts: testTimeoutConfig(), } options := ConfigOptions(inOpt) require.Equal(t, tc.wantOptions, options.GossipRing) @@ -361,7 +374,8 @@ func TestConfigOptions_RetentionConfig(t *testing.T) { t.Parallel() inOpt := Options{ - Stack: tc.spec, + Stack: tc.spec, + Timeouts: testTimeoutConfig(), } options := ConfigOptions(inOpt) require.Equal(t, tc.wantOptions, options.Retention) @@ -383,6 +397,7 @@ func TestConfigOptions_RulerAlertManager(t *testing.T) { Mode: lokiv1.Static, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -394,6 +409,7 @@ func TestConfigOptions_RulerAlertManager(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -405,6 +421,7 @@ func TestConfigOptions_RulerAlertManager(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: testTimeoutConfig(), OpenShiftOptions: openshift.Options{ BuildOpts: openshift.BuildOptions{ AlertManagerEnabled: true, @@ -426,6 +443,7 @@ func TestConfigOptions_RulerAlertManager(t *testing.T) { Mode: lokiv1.OpenshiftNetwork, }, }, + Timeouts: testTimeoutConfig(), OpenShiftOptions: openshift.Options{ BuildOpts: openshift.BuildOptions{ AlertManagerEnabled: true, @@ -469,6 +487,7 @@ func TestConfigOptions_RulerAlertManager_UserOverride(t *testing.T) { Mode: lokiv1.Static, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -480,6 +499,7 @@ func TestConfigOptions_RulerAlertManager_UserOverride(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -494,6 +514,7 @@ func TestConfigOptions_RulerAlertManager_UserOverride(t *testing.T) { Enabled: true, }, }, + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -530,6 +551,7 @@ func TestConfigOptions_RulerAlertManager_UserOverride(t *testing.T) { Enabled: true, }, }, + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -584,6 +606,7 @@ func TestConfigOptions_RulerOverrides_OCPApplicationTenant(t *testing.T) { Mode: lokiv1.Static, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -595,6 +618,7 @@ func TestConfigOptions_RulerOverrides_OCPApplicationTenant(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -609,6 +633,7 @@ func TestConfigOptions_RulerOverrides_OCPApplicationTenant(t *testing.T) { Enabled: true, }, }, + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -662,6 +687,7 @@ func TestConfigOptions_RulerOverrides_OCPApplicationTenant(t *testing.T) { Enabled: true, }, }, + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -711,6 +737,7 @@ func TestConfigOptions_RulerOverrides(t *testing.T) { Mode: lokiv1.Static, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -722,6 +749,7 @@ func TestConfigOptions_RulerOverrides(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, }, @@ -736,6 +764,7 @@ func TestConfigOptions_RulerOverrides(t *testing.T) { Enabled: true, }, }, + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -867,6 +896,7 @@ func TestConfigOptions_RulerOverrides(t *testing.T) { Enabled: true, }, }, + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -917,6 +947,7 @@ func TestConfigOptions_RulerOverrides_OCPUserWorkloadOnlyEnabled(t *testing.T) { Mode: lokiv1.Static, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, wantOverridesOptions: nil, @@ -929,6 +960,7 @@ func TestConfigOptions_RulerOverrides_OCPUserWorkloadOnlyEnabled(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: testTimeoutConfig(), }, wantOptions: nil, wantOverridesOptions: nil, @@ -944,6 +976,7 @@ func TestConfigOptions_RulerOverrides_OCPUserWorkloadOnlyEnabled(t *testing.T) { Enabled: true, }, }, + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -1003,6 +1036,8 @@ func TestConfigOptions_RulerOverrides_OCPUserWorkloadOnlyEnabled(t *testing.T) { Enabled: true, }, }, + + Timeouts: testTimeoutConfig(), Ruler: Ruler{ Spec: &lokiv1.RulerConfigSpec{ AlertManagerSpec: &lokiv1.AlertManagerSpec{ @@ -1131,10 +1166,27 @@ func TestConfigOptions_Replication(t *testing.T) { t.Parallel() inOpt := Options{ - Stack: tc.spec, + Stack: tc.spec, + Timeouts: testTimeoutConfig(), } options := ConfigOptions(inOpt) require.Equal(t, tc.wantOptions, *options.Stack.Replication) }) } } + +func TestConfigOptions_ServerOptions(t *testing.T) { + opt := Options{ + Stack: lokiv1.LokiStackSpec{}, + Timeouts: testTimeoutConfig(), + } + got := ConfigOptions(opt) + + want := config.HTTPTimeoutConfig{ + IdleTimeout: time.Second, + ReadTimeout: time.Minute, + WriteTimeout: 10 * time.Minute, + } + + require.Equal(t, want, got.HTTPTimeouts) +} diff --git a/operator/internal/manifests/gateway.go b/operator/internal/manifests/gateway.go index 7907b7bf9211e..cc6d7444b093a 100644 --- a/operator/internal/manifests/gateway.go +++ b/operator/internal/manifests/gateway.go @@ -142,8 +142,11 @@ func NewGatewayDeployment(opts Options, sha1C string) *appsv1.Deployment { fmt.Sprintf("--logs.read.endpoint=http://%s:%d", fqdn(serviceNameQueryFrontendHTTP(opts.Name), opts.Namespace), httpPort), fmt.Sprintf("--logs.tail.endpoint=http://%s:%d", fqdn(serviceNameQueryFrontendHTTP(opts.Name), opts.Namespace), httpPort), fmt.Sprintf("--logs.write.endpoint=http://%s:%d", fqdn(serviceNameDistributorHTTP(opts.Name), opts.Namespace), httpPort), + fmt.Sprintf("--logs.write-timeout=%s", opts.Timeouts.Gateway.UpstreamWriteTimeout), fmt.Sprintf("--rbac.config=%s", path.Join(gateway.LokiGatewayMountDir, gateway.LokiGatewayRbacFileName)), fmt.Sprintf("--tenants.config=%s", path.Join(gateway.LokiGatewayMountDir, gateway.LokiGatewayTenantFileName)), + fmt.Sprintf("--server.read-timeout=%s", opts.Timeouts.Gateway.ReadTimeout), + fmt.Sprintf("--server.write-timeout=%s", opts.Timeouts.Gateway.WriteTimeout), }, Ports: []corev1.ContainerPort{ { diff --git a/operator/internal/manifests/gateway_tenants.go b/operator/internal/manifests/gateway_tenants.go index 1d28c8b19c959..25805690ca6aa 100644 --- a/operator/internal/manifests/gateway_tenants.go +++ b/operator/internal/manifests/gateway_tenants.go @@ -3,12 +3,13 @@ package manifests import ( "github.com/ViaQ/logerr/v2/kverrors" + "github.com/imdario/mergo" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/internal/manifests/internal/config" "github.com/grafana/loki/operator/internal/manifests/openshift" - "github.com/imdario/mergo" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -35,6 +36,7 @@ func ApplyGatewayDefaultOptions(opts *Options) error { GatewayName(opts.Name), serviceNameGatewayHTTP(opts.Name), gatewayHTTPPortName, + opts.Timeouts.Gateway.WriteTimeout, ComponentLabels(LabelGatewayComponent, opts.Name), RulerName(opts.Name), ) diff --git a/operator/internal/manifests/gateway_tenants_test.go b/operator/internal/manifests/gateway_tenants_test.go index a8d582e67ae0d..843194954a01b 100644 --- a/operator/internal/manifests/gateway_tenants_test.go +++ b/operator/internal/manifests/gateway_tenants_test.go @@ -3,6 +3,7 @@ package manifests import ( "path" "testing" + "time" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "github.com/stretchr/testify/require" @@ -58,6 +59,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.Static, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, }, want: &Options{ Name: "lokistack-ocp", @@ -73,6 +79,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.Static, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, OpenShiftOptions: openshift.Options{ BuildOpts: openshift.BuildOptions{ LokiStackName: "lokistack-ocp", @@ -80,6 +91,7 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { GatewayName: "lokistack-ocp-gateway", GatewaySvcName: "lokistack-ocp-gateway-http", GatewaySvcTargetPort: "public", + GatewayRouteTimeout: 75 * time.Second, RulerName: "lokistack-ocp-ruler", Labels: ComponentLabels(LabelGatewayComponent, "lokistack-ocp"), }, @@ -119,6 +131,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, }, want: &Options{ Name: "lokistack-ocp", @@ -134,6 +151,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, OpenShiftOptions: openshift.Options{ BuildOpts: openshift.BuildOptions{ LokiStackName: "lokistack-ocp", @@ -141,6 +163,7 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { GatewayName: "lokistack-ocp-gateway", GatewaySvcName: "lokistack-ocp-gateway-http", GatewaySvcTargetPort: "public", + GatewayRouteTimeout: 75 * time.Second, RulerName: "lokistack-ocp-ruler", Labels: ComponentLabels(LabelGatewayComponent, "lokistack-ocp"), }, @@ -163,6 +186,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, Tenants: Tenants{ Configs: map[string]TenantConfig{ "application": { @@ -197,6 +225,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, Tenants: Tenants{ Configs: map[string]TenantConfig{ "application": { @@ -223,6 +256,7 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { GatewayName: "lokistack-ocp-gateway", GatewaySvcName: "lokistack-ocp-gateway-http", GatewaySvcTargetPort: "public", + GatewayRouteTimeout: 75 * time.Second, RulerName: "lokistack-ocp-ruler", Labels: ComponentLabels(LabelGatewayComponent, "lokistack-ocp"), }, @@ -268,6 +302,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.OpenshiftNetwork, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, Tenants: Tenants{ Configs: map[string]TenantConfig{ "network": { @@ -292,6 +331,11 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { Mode: lokiv1.OpenshiftNetwork, }, }, + Timeouts: TimeoutConfig{ + Gateway: GatewayTimeoutConfig{ + WriteTimeout: 1 * time.Minute, + }, + }, Tenants: Tenants{ Configs: map[string]TenantConfig{ "network": { @@ -308,6 +352,7 @@ func TestApplyGatewayDefaultsOptions(t *testing.T) { GatewayName: "lokistack-ocp-gateway", GatewaySvcName: "lokistack-ocp-gateway-http", GatewaySvcTargetPort: "public", + GatewayRouteTimeout: 75 * time.Second, RulerName: "lokistack-ocp-ruler", Labels: ComponentLabels(LabelGatewayComponent, "lokistack-ocp"), }, diff --git a/operator/internal/manifests/gateway_test.go b/operator/internal/manifests/gateway_test.go index a010a218c5575..6517e4c1a71e7 100644 --- a/operator/internal/manifests/gateway_test.go +++ b/operator/internal/manifests/gateway_test.go @@ -48,6 +48,7 @@ func TestNewGatewayDeployment_HasTemplateConfigHashAnnotation(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, }, sha1C) expected := "loki.grafana.com/config-hash" @@ -95,6 +96,7 @@ func TestNewGatewayDeployment_HasNodeSelector(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, }, "deadbeef") require.Equal(t, dpl.Spec.Template.Spec.NodeSelector, selector) @@ -129,6 +131,7 @@ func TestNewGatewayDeployment_HasTemplateCertRotationRequiredAtAnnotation(t *tes }, }, }, + Timeouts: defaultTimeoutConfig, }, sha1C) expected := "loki.grafana.com/certRotationRequiredAt" @@ -187,6 +190,7 @@ func TestGatewayConfigMap_ReturnsSHA1OfBinaryContents(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, Tenants: Tenants{ Secrets: []*TenantSecrets{ { @@ -221,6 +225,7 @@ func TestBuildGateway_HasConfigForTenantMode(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }) require.NoError(t, err) @@ -257,6 +262,7 @@ func TestBuildGateway_HasExtraObjectsForTenantMode(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }) require.NoError(t, err) @@ -292,6 +298,7 @@ func TestBuildGateway_WithExtraObjectsForTenantMode_RouteSvcMatches(t *testing.T Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }) require.NoError(t, err) @@ -329,6 +336,7 @@ func TestBuildGateway_WithExtraObjectsForTenantMode_ServiceAccountNameMatches(t Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }) require.NoError(t, err) @@ -367,6 +375,7 @@ func TestBuildGateway_WithExtraObjectsForTenantMode_ReplacesIngressWithRoute(t * Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }) require.NoError(t, err) @@ -432,6 +441,7 @@ func TestBuildGateway_WithTLSProfile(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, }, expectedArgs: []string{ "--tls.min-version=min-version", @@ -462,6 +472,7 @@ func TestBuildGateway_WithTLSProfile(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: defaultTimeoutConfig, }, expectedArgs: []string{ "--tls.min-version=min-version", @@ -492,6 +503,7 @@ func TestBuildGateway_WithTLSProfile(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }, expectedArgs: []string{ "--tls.min-version=min-version", @@ -563,6 +575,7 @@ func TestBuildGateway_WithRulesEnabled(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, }, missingArgs: []string{ "--logs.rules.endpoint=http://abcd-ruler-http.efgh.svc.cluster.local:3100", @@ -612,6 +625,7 @@ func TestBuildGateway_WithRulesEnabled(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, }, wantArgs: []string{ "--logs.rules.endpoint=http://abcd-ruler-http.efgh.svc.cluster.local:3100", @@ -639,6 +653,7 @@ func TestBuildGateway_WithRulesEnabled(t *testing.T) { Mode: lokiv1.Dynamic, }, }, + Timeouts: defaultTimeoutConfig, }, wantArgs: []string{ "--logs.rules.endpoint=http://abcd-ruler-http.efgh.svc.cluster.local:3100", @@ -670,6 +685,7 @@ func TestBuildGateway_WithRulesEnabled(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }, wantArgs: []string{ "--logs.rules.endpoint=https://abcd-ruler-http.efgh.svc.cluster.local:3100", @@ -701,6 +717,7 @@ func TestBuildGateway_WithRulesEnabled(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }, wantArgs: []string{ "--logs.rules.endpoint=https://abcd-ruler-http.efgh.svc.cluster.local:3100", @@ -754,6 +771,7 @@ func TestBuildGateway_WithHTTPEncryption(t *testing.T) { Authentication: []lokiv1.AuthenticationSpec{}, }, }, + Timeouts: defaultTimeoutConfig, }) require.NoError(t, err) @@ -773,8 +791,11 @@ func TestBuildGateway_WithHTTPEncryption(t *testing.T) { "--logs.read.endpoint=https://abcd-query-frontend-http.efgh.svc.cluster.local:3100", "--logs.tail.endpoint=https://abcd-query-frontend-http.efgh.svc.cluster.local:3100", "--logs.write.endpoint=https://abcd-distributor-http.efgh.svc.cluster.local:3100", + "--logs.write-timeout=4m0s", "--rbac.config=/etc/lokistack-gateway/rbac.yaml", "--tenants.config=/etc/lokistack-gateway/tenants.yaml", + "--server.read-timeout=48s", + "--server.write-timeout=6m0s", "--logs.rules.endpoint=https://abcd-ruler-http.efgh.svc.cluster.local:3100", "--logs.rules.read-only=true", "--tls.client-auth-type=NoClientCert", @@ -926,6 +947,7 @@ func TestBuildGateway_PodDisruptionBudget(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, } objs, err := BuildGateway(opts) require.NoError(t, err) @@ -957,6 +979,7 @@ func TestBuildGateway_TopologySpreadConstraint(t *testing.T) { Mode: lokiv1.OpenshiftLogging, }, }, + Timeouts: defaultTimeoutConfig, }, "deadbeef") require.EqualValues(t, dpl.Spec.Template.Spec.TopologySpreadConstraints, []corev1.TopologySpreadConstraint{ diff --git a/operator/internal/manifests/internal/config/build_test.go b/operator/internal/manifests/internal/config/build_test.go index 78c8a64da6b58..d6e44811b6416 100644 --- a/operator/internal/manifests/internal/config/build_test.go +++ b/operator/internal/manifests/internal/config/build_test.go @@ -2,6 +2,7 @@ package config import ( "testing" + "time" "github.com/stretchr/testify/require" "k8s.io/utils/pointer" @@ -152,8 +153,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -247,6 +249,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -395,8 +402,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -520,6 +528,11 @@ overrides: }, }, }, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -799,8 +812,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -941,6 +955,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -1143,8 +1162,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -1286,6 +1306,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -1501,8 +1526,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -1661,6 +1687,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -1817,8 +1848,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -1981,6 +2013,11 @@ overrides: Enabled: true, DeleteWorkerCount: 50, }, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -2209,8 +2246,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -2386,6 +2424,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -2572,8 +2615,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s tls_min_version: VersionTLS12 tls_cipher_suites: cipher1,cipher2 http_tls_config: @@ -2718,6 +2762,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -2946,8 +2995,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -3200,6 +3250,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) @@ -3350,8 +3405,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s log_level: info storage_config: boltdb_shipper: @@ -3446,6 +3502,11 @@ overrides: }, }, EnableRemoteReporting: true, + HTTPTimeouts: HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + WriteTimeout: 10 * time.Minute, + }, } cfg, rCfg, err := Build(opts) require.NoError(t, err) diff --git a/operator/internal/manifests/internal/config/loki-config.yaml b/operator/internal/manifests/internal/config/loki-config.yaml index a2db6e3a6eeb8..4e44f081e8938 100644 --- a/operator/internal/manifests/internal/config/loki-config.yaml +++ b/operator/internal/manifests/internal/config/loki-config.yaml @@ -422,8 +422,9 @@ server: grpc_server_max_recv_msg_size: 104857600 grpc_server_max_send_msg_size: 104857600 http_listen_port: 3100 - http_server_idle_timeout: 120s - http_server_write_timeout: 1m + http_server_idle_timeout: {{ .HTTPTimeouts.IdleTimeout }} + http_server_read_timeout: {{ .HTTPTimeouts.ReadTimeout }} + http_server_write_timeout: {{ .HTTPTimeouts.WriteTimeout }} {{- if or .Gates.HTTPEncryption .Gates.GRPCEncryption }} tls_min_version: {{ .TLS.MinTLSVersion }} tls_cipher_suites: {{ .TLS.CipherSuitesString }} diff --git a/operator/internal/manifests/internal/config/options.go b/operator/internal/manifests/internal/config/options.go index 557e7d570616a..3a7698c851ca1 100644 --- a/operator/internal/manifests/internal/config/options.go +++ b/operator/internal/manifests/internal/config/options.go @@ -4,6 +4,7 @@ import ( "fmt" "math" "strings" + "time" configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" @@ -31,6 +32,8 @@ type Options struct { ObjectStorage storage.Options + HTTPTimeouts HTTPTimeoutConfig + Retention RetentionOptions Overrides map[string]LokiOverrides @@ -67,6 +70,13 @@ type GossipRing struct { MembersDiscoveryAddr string } +// HTTPTimeoutConfig defines the HTTP server config options. +type HTTPTimeoutConfig struct { + IdleTimeout time.Duration + ReadTimeout time.Duration + WriteTimeout time.Duration +} + // Ruler configuration type Ruler struct { Enabled bool diff --git a/operator/internal/manifests/openshift/build_test.go b/operator/internal/manifests/openshift/build_test.go index 0f90b7c693505..875138614629c 100644 --- a/operator/internal/manifests/openshift/build_test.go +++ b/operator/internal/manifests/openshift/build_test.go @@ -1,17 +1,20 @@ package openshift import ( + "fmt" "testing" + "time" "github.com/stretchr/testify/require" - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" + + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" ) func TestBuildGatewayTenantModeObjects_ClusterRoleRefMatches(t *testing.T) { - opts := NewOptions("abc", "ns", "abc", "abc", "abc", map[string]string{}, "abc"). + opts := NewOptions("abc", "ns", "abc", "abc", "abc", 1*time.Minute, map[string]string{}, "abc"). WithTenantsForMode(lokiv1.OpenshiftLogging, "example.com", map[string]TenantData{}) objs := BuildGatewayTenantModeObjects(*opts) @@ -23,7 +26,7 @@ func TestBuildGatewayTenantModeObjects_ClusterRoleRefMatches(t *testing.T) { } func TestBuildGatewayObjects_MonitoringClusterRoleRefMatches(t *testing.T) { - opts := NewOptions("abc", "ns", "abc", "abc", "abc", map[string]string{}, "abc") + opts := NewOptions("abc", "ns", "abc", "abc", "abc", 1*time.Minute, map[string]string{}, "abc") objs := BuildGatewayObjects(*opts) cr := objs[2].(*rbacv1.Role) @@ -33,8 +36,23 @@ func TestBuildGatewayObjects_MonitoringClusterRoleRefMatches(t *testing.T) { require.Equal(t, cr.Name, rb.RoleRef.Name) } +func TestBuildGatewayObjets_RouteWithTimeoutAnnotation(t *testing.T) { + gwWriteTimeout := 1 * time.Minute + opts := NewOptions("abc", "ns", "abc", "abc", "abc", gwWriteTimeout, map[string]string{}, "abc") + + objs := BuildGatewayObjects(*opts) + a := objs[0].GetAnnotations() + + got, ok := a[annotationGatewayRouteTimeout] + require.True(t, ok) + + routeTimeout := gwWriteTimeout + gatewayRouteTimeoutExtension + want := fmt.Sprintf("%.fs", routeTimeout.Seconds()) + require.Equal(t, want, got) +} + func TestBuildRulerObjects_ClusterRoleRefMatches(t *testing.T) { - opts := NewOptions("abc", "ns", "abc", "abc", "abc", map[string]string{}, "abc") + opts := NewOptions("abc", "ns", "abc", "abc", "abc", 1*time.Minute, map[string]string{}, "abc") objs := BuildRulerObjects(*opts) sa := objs[1].(*corev1.ServiceAccount) diff --git a/operator/internal/manifests/openshift/options.go b/operator/internal/manifests/openshift/options.go index 8bca7b82fb384..2ebf5ebde1f46 100644 --- a/operator/internal/manifests/openshift/options.go +++ b/operator/internal/manifests/openshift/options.go @@ -3,6 +3,7 @@ package openshift import ( "fmt" "math/rand" + "time" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" ) @@ -41,6 +42,7 @@ type BuildOptions struct { GatewayName string GatewaySvcName string GatewaySvcTargetPort string + GatewayRouteTimeout time.Duration RulerName string Labels map[string]string AlertManagerEnabled bool @@ -56,6 +58,7 @@ type TenantData struct { func NewOptions( stackName, stackNamespace string, gwName, gwSvcName, gwPortName string, + gwWriteTimeout time.Duration, gwLabels map[string]string, rulerName string, ) *Options { @@ -66,6 +69,7 @@ func NewOptions( GatewayName: gwName, GatewaySvcName: gwSvcName, GatewaySvcTargetPort: gwPortName, + GatewayRouteTimeout: gwWriteTimeout + gatewayRouteTimeoutExtension, Labels: gwLabels, RulerName: rulerName, }, diff --git a/operator/internal/manifests/openshift/route.go b/operator/internal/manifests/openshift/route.go index 18a9f12d71b84..140595b9bed66 100644 --- a/operator/internal/manifests/openshift/route.go +++ b/operator/internal/manifests/openshift/route.go @@ -1,6 +1,8 @@ package openshift import ( + "fmt" + routev1 "github.com/openshift/api/route/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -19,6 +21,9 @@ func BuildRoute(opts Options) client.Object { Name: routeName(opts), Namespace: opts.BuildOpts.LokiStackNamespace, Labels: opts.BuildOpts.Labels, + Annotations: map[string]string{ + annotationGatewayRouteTimeout: fmt.Sprintf("%.fs", opts.BuildOpts.GatewayRouteTimeout.Seconds()), + }, }, Spec: routev1.RouteSpec{ To: routev1.RouteTargetReference{ diff --git a/operator/internal/manifests/openshift/var.go b/operator/internal/manifests/openshift/var.go index cda53dc0d0844..ab4ec0996e598 100644 --- a/operator/internal/manifests/openshift/var.go +++ b/operator/internal/manifests/openshift/var.go @@ -2,6 +2,13 @@ package openshift import ( "fmt" + "time" +) + +const ( + annotationGatewayRouteTimeout = "haproxy.router.openshift.io/timeout" + + gatewayRouteTimeoutExtension = 15 * time.Second ) var ( diff --git a/operator/internal/manifests/options.go b/operator/internal/manifests/options.go index 3d9547b610ac8..24e07e375ef47 100644 --- a/operator/internal/manifests/options.go +++ b/operator/internal/manifests/options.go @@ -2,6 +2,9 @@ package manifests import ( "strings" + "time" + + "github.com/grafana/loki/operator/internal/manifests/internal/config" configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" @@ -34,11 +37,26 @@ type Options struct { OpenShiftOptions openshift.Options + Timeouts TimeoutConfig + Tenants Tenants TLSProfile TLSProfileSpec } +// GatewayTimeoutConfig contains the http server configuration options for all Loki components. +type GatewayTimeoutConfig struct { + ReadTimeout time.Duration + WriteTimeout time.Duration + UpstreamWriteTimeout time.Duration +} + +// TimeoutConfig contains the server configuration options for all Loki components +type TimeoutConfig struct { + Loki config.HTTPTimeoutConfig + Gateway GatewayTimeoutConfig +} + // Tenants contains the configuration per tenant and secrets for authn/authz. // Secrets are required only for modes static and dynamic to reconcile the OIDC provider. // Configs are required only for all modes to reconcile rules and gateway configuration. @@ -105,3 +123,67 @@ type TLSProfileSpec struct { func (o Options) TLSCipherSuites() string { return strings.Join(o.TLSProfile.Ciphers, ",") } + +// NewTimeoutConfig creates a TimeoutConfig from the QueryTimeout values in the spec's limits. +func NewTimeoutConfig(s *lokiv1.LimitsSpec) (TimeoutConfig, error) { + if s == nil { + return defaultTimeoutConfig, nil + } + + if s.Global == nil && s.Tenants == nil { + return defaultTimeoutConfig, nil + } + + queryTimeout := lokiDefaultQueryTimeout + if s.Global.QueryLimits != nil && s.Global.QueryLimits.QueryTimeout != "" { + var err error + globalQueryTimeout, err := time.ParseDuration(s.Global.QueryLimits.QueryTimeout) + if err != nil { + return TimeoutConfig{}, err + } + + if globalQueryTimeout > queryTimeout { + queryTimeout = globalQueryTimeout + } + } + + for _, tLimit := range s.Tenants { + if tLimit.QueryLimits == nil || tLimit.QueryLimits.QueryTimeout == "" { + continue + } + + tenantQueryTimeout, err := time.ParseDuration(tLimit.QueryLimits.QueryTimeout) + if err != nil { + return TimeoutConfig{}, err + } + + if tenantQueryTimeout > queryTimeout { + queryTimeout = tenantQueryTimeout + } + } + + return calculateHTTPTimeouts(queryTimeout), nil +} + +func calculateHTTPTimeouts(queryTimeout time.Duration) TimeoutConfig { + idleTimeout := lokiDefaultHTTPIdleTimeout + if queryTimeout < idleTimeout { + idleTimeout = queryTimeout + } + + readTimeout := queryTimeout / 10 + writeTimeout := queryTimeout + lokiQueryWriteDuration + + return TimeoutConfig{ + Loki: config.HTTPTimeoutConfig{ + IdleTimeout: idleTimeout, + ReadTimeout: readTimeout, + WriteTimeout: writeTimeout, + }, + Gateway: GatewayTimeoutConfig{ + ReadTimeout: readTimeout + gatewayReadDuration, + WriteTimeout: writeTimeout + gatewayWriteDuration, + UpstreamWriteTimeout: writeTimeout, + }, + } +} diff --git a/operator/internal/manifests/options_test.go b/operator/internal/manifests/options_test.go new file mode 100644 index 0000000000000..6d49649620437 --- /dev/null +++ b/operator/internal/manifests/options_test.go @@ -0,0 +1,142 @@ +package manifests + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/operator/internal/manifests/internal/config" + + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" +) + +func TestNewTimeoutConfig_ReturnsDefaults_WhenLimitsSpecEmpty(t *testing.T) { + s := lokiv1.LokiStack{} + + got, err := NewTimeoutConfig(s.Spec.Limits) + require.NoError(t, err) + require.Equal(t, defaultTimeoutConfig, got) +} + +func TestNewTimeoutConfig_ReturnsCustomConfig_WhenLimitsSpecNotEmpty(t *testing.T) { + s := lokiv1.LokiStack{ + Spec: lokiv1.LokiStackSpec{ + Limits: &lokiv1.LimitsSpec{ + Global: &lokiv1.LimitsTemplateSpec{ + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "10m", + }, + }, + }, + }, + } + + got, err := NewTimeoutConfig(s.Spec.Limits) + require.NoError(t, err) + + want := TimeoutConfig{ + Loki: config.HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 1 * time.Minute, + WriteTimeout: 11 * time.Minute, + }, + Gateway: GatewayTimeoutConfig{ + ReadTimeout: 1*time.Minute + gatewayReadDuration, + WriteTimeout: 11*time.Minute + gatewayWriteDuration, + UpstreamWriteTimeout: 11 * time.Minute, + }, + } + + require.Equal(t, want, got) +} + +func TestNewTimeoutConfig_ReturnsCustomConfig_WhenLimitsSpecNotEmpty_UseMaxTenantQueryTimeout(t *testing.T) { + s := lokiv1.LokiStack{ + Spec: lokiv1.LokiStackSpec{ + Limits: &lokiv1.LimitsSpec{ + Global: &lokiv1.LimitsTemplateSpec{ + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "10m", + }, + }, + Tenants: map[string]lokiv1.LimitsTemplateSpec{ + "tenant-a": { + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "10m", + }, + }, + "tenant-b": { + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "20m", + }, + }, + }, + }, + }, + } + + got, err := NewTimeoutConfig(s.Spec.Limits) + require.NoError(t, err) + + want := TimeoutConfig{ + Loki: config.HTTPTimeoutConfig{ + IdleTimeout: 30 * time.Second, + ReadTimeout: 2 * time.Minute, + WriteTimeout: 21 * time.Minute, + }, + Gateway: GatewayTimeoutConfig{ + ReadTimeout: 2*time.Minute + gatewayReadDuration, + WriteTimeout: 21*time.Minute + gatewayWriteDuration, + UpstreamWriteTimeout: 21 * time.Minute, + }, + } + + require.Equal(t, want, got) +} + +func TestNewTimeoutConfig_ReturnsDefaults_WhenGlobalQueryTimeoutParseError(t *testing.T) { + s := lokiv1.LokiStack{ + Spec: lokiv1.LokiStackSpec{ + Limits: &lokiv1.LimitsSpec{ + Global: &lokiv1.LimitsTemplateSpec{ + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "invalid", + }, + }, + }, + }, + } + + _, err := NewTimeoutConfig(s.Spec.Limits) + require.Error(t, err) +} + +func TestNewTimeoutConfig_ReturnsDefaults_WhenTenantQueryTimeoutParseError(t *testing.T) { + s := lokiv1.LokiStack{ + Spec: lokiv1.LokiStackSpec{ + Limits: &lokiv1.LimitsSpec{ + Global: &lokiv1.LimitsTemplateSpec{ + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "10m", + }, + }, + Tenants: map[string]lokiv1.LimitsTemplateSpec{ + "tenant-a": { + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "invalid", + }, + }, + "tenant-b": { + QueryLimits: &lokiv1.QueryLimitSpec{ + QueryTimeout: "20m", + }, + }, + }, + }, + }, + } + + _, err := NewTimeoutConfig(s.Spec.Limits) + require.Error(t, err) +} diff --git a/operator/internal/manifests/service_test.go b/operator/internal/manifests/service_test.go index f66a42906d2fe..58b3bc0001736 100644 --- a/operator/internal/manifests/service_test.go +++ b/operator/internal/manifests/service_test.go @@ -54,6 +54,7 @@ func TestServicesMatchPorts(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, } sha1C := "deadbef" @@ -183,6 +184,7 @@ func TestServicesMatchLabels(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, } sha1C := "deadbef" @@ -305,6 +307,7 @@ func TestServices_WithEncryption(t *testing.T) { }, }, }, + Timeouts: defaultTimeoutConfig, TLSProfile: TLSProfileSpec{ MinTLSVersion: "VersionTLS12", Ciphers: []string{"cipher1", "cipher2"}, diff --git a/operator/internal/manifests/var.go b/operator/internal/manifests/var.go index b80f04b90f9aa..d94fcf0bb1ab7 100644 --- a/operator/internal/manifests/var.go +++ b/operator/internal/manifests/var.go @@ -3,6 +3,7 @@ package manifests import ( "fmt" "path" + "time" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" corev1 "k8s.io/api/core/v1" @@ -109,7 +110,20 @@ const ( kubernetesInstanceLabel = "app.kubernetes.io/instance" ) +const ( + // lokiDefaultQueryTimeout contains the default query timeout. It should match the value mentioned in the CRD + // definition and also the default in the `sizes.go`. + lokiDefaultQueryTimeout = 3 * time.Minute + lokiDefaultHTTPIdleTimeout = 30 * time.Second + lokiQueryWriteDuration = 1 * time.Minute + + gatewayReadDuration = 30 * time.Second + gatewayWriteDuration = 2 * time.Minute +) + var ( + defaultTimeoutConfig = calculateHTTPTimeouts(lokiDefaultQueryTimeout) + defaultConfigMapMode = int32(420) volumeFileSystemMode = corev1.PersistentVolumeFilesystem podAntiAffinityComponents = map[string]struct{}{