From 6b6e292dddd2721fafd5621a56298ea4222752e7 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 26 Feb 2025 14:18:21 +0100 Subject: [PATCH 1/4] feat: Support configuring JVM arguments --- deploy/helm/zookeeper-operator/crds/crds.yaml | 52 +++++ .../configuration_environment_overrides.adoc | 95 --------- docs/modules/zookeeper/partials/nav.adoc | 2 +- rust/operator-binary/src/config/jvm.rs | 195 ++++++++++++++++++ rust/operator-binary/src/config/mod.rs | 1 + rust/operator-binary/src/crd/mod.rs | 87 ++------ rust/operator-binary/src/main.rs | 1 + rust/operator-binary/src/zk_controller.rs | 31 ++- 8 files changed, 283 insertions(+), 181 deletions(-) delete mode 100644 docs/modules/zookeeper/pages/usage_guide/configuration_environment_overrides.adoc create mode 100644 rust/operator-binary/src/config/jvm.rs create mode 100644 rust/operator-binary/src/config/mod.rs diff --git a/deploy/helm/zookeeper-operator/crds/crds.yaml b/deploy/helm/zookeeper-operator/crds/crds.yaml index 82389036..1ecc3a6a 100644 --- a/deploy/helm/zookeeper-operator/crds/crds.yaml +++ b/deploy/helm/zookeeper-operator/crds/crds.yaml @@ -411,6 +411,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -698,6 +724,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. diff --git a/docs/modules/zookeeper/pages/usage_guide/configuration_environment_overrides.adoc b/docs/modules/zookeeper/pages/usage_guide/configuration_environment_overrides.adoc deleted file mode 100644 index 3e81fc05..00000000 --- a/docs/modules/zookeeper/pages/usage_guide/configuration_environment_overrides.adoc +++ /dev/null @@ -1,95 +0,0 @@ - -= Configuration and environment overrides - -The cluster definition also supports overriding configuration properties and environment variables, either per role or per role group, where the more specific override (role group) has precedence over the less specific one (role). - -IMPORTANT: Overriding certain properties which are set by operator (such as the ports) can interfere with the operator and can lead to problems. - -== Configuration properties - -For a role or role group, at the same level of `config`, you can specify: `configOverrides` for the `zoo.cfg` and `security.properties` files. - -=== Overriding entries in zoo.cfg - -For example, if you want to set the `4lw.commands.whitelist` to allow the `ruok` administrative command, it can be configured in the `ZookeeperCluster` resource like so: - -[source,yaml] ----- -servers: - roleGroups: - default: - configOverrides: - zoo.cfg: - 4lw.commands.whitelist: "srvr, ruok" - replicas: 1 ----- - -Just as for the `config`, it is possible to specify this at role level as well: - -[source,yaml] ----- -servers: - configOverrides: - zoo.cfg: - 4lw.commands.whitelist: "srvr, ruok" - roleGroups: - default: - replicas: 1 ----- - -All property values must be strings. - -For a full list of configuration options refer to the Apache ZooKeeper https://zookeeper.apache.org/doc/r3.9.2/zookeeperAdmin.html#sc_configuration[Configuration Reference]. - -=== Overriding entries in security.properties - -The `security.properties` file is used to configure JVM security properties. It is very seldom that users need to tweak any of these, but there is one use-case that stands out, and that users need to be aware of: the JVM DNS cache. - -The JVM manages its own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. -Some products of the Stackable platform are very sensible to the contents of these caches and their performance is heavily affected by them. -As of version 3.8.1, Apache ZooKeeper always requires up-to-date IP addresses to maintain its quorum. -To guarantee this, the negative DNS cache of the JVM needs to be disabled. This can be achieved by setting the TTL of entries in the negative cache to zero, like this: - -[source,yaml] ----- - servers: - configOverrides: - security.properties: - networkaddress.cache.ttl: "5" - networkaddress.cache.negative.ttl: "0" ----- - -NOTE: The operator configures DNS caching by default as shown in the example above. - -For details on the JVM security see https://docs.oracle.com/en/java/javase/11/security/java-security-overview1.html - -== Environment variables - -In a similar fashion, environment variables can be (over)written. For example per role group: - -[source,yaml] ----- -servers: - roleGroups: - default: - envOverrides: - MY_ENV_VAR: "MY_VALUE" - replicas: 1 ----- - -or per role: - -[source,yaml] ----- -servers: - envOverrides: - MY_ENV_VAR: "MY_VALUE" - roleGroups: - default: - replicas: 1 ----- - -== Pod overrides - -The ZooKeeper operator also supports Pod overrides, allowing you to override any property that you can set on a Kubernetes Pod. -Read the xref:concepts:overrides.adoc#pod-overrides[Pod overrides documentation] to learn more about this feature. diff --git a/docs/modules/zookeeper/partials/nav.adoc b/docs/modules/zookeeper/partials/nav.adoc index 5281680f..bd21ee88 100644 --- a/docs/modules/zookeeper/partials/nav.adoc +++ b/docs/modules/zookeeper/partials/nav.adoc @@ -12,7 +12,7 @@ ** xref:zookeeper:usage_guide/log_aggregation.adoc[] ** xref:zookeeper:usage_guide/using_multiple_role_groups.adoc[] ** xref:zookeeper:usage_guide/isolating_clients_with_znodes.adoc[] -** xref:zookeeper:usage_guide/configuration_environment_overrides.adoc[] +** xref:zookeeper:usage_guide/overrides.adoc[] ** xref:zookeeper:usage_guide/operations/index.adoc[] *** xref:zookeeper:usage_guide/operations/cluster-operations.adoc[] *** xref:zookeeper:usage_guide/operations/pod-placement.adoc[] diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs new file mode 100644 index 00000000..48998491 --- /dev/null +++ b/rust/operator-binary/src/config/jvm.rs @@ -0,0 +1,195 @@ +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + memory::{BinaryMultiple, MemoryQuantity}, + role_utils::{self, GenericRoleConfig, JavaCommonConfig, JvmArgumentOverrides, Role}, +}; + +use crate::crd::{ + v1alpha1::{ZookeeperCluster, ZookeeperConfig, ZookeeperConfigFragment}, + LoggingFramework, JVM_SECURITY_PROPERTIES_FILE, LOG4J_CONFIG_FILE, LOGBACK_CONFIG_FILE, + METRICS_PORT, STACKABLE_CONFIG_DIR, STACKABLE_LOG_CONFIG_DIR, +}; + +const JAVA_HEAP_FACTOR: f32 = 0.8; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("invalid memory resource configuration - missing default or value in crd?"))] + MissingMemoryResourceConfig, + + #[snafu(display("invalid memory config"))] + InvalidMemoryConfig { + source: stackable_operator::memory::Error, + }, + + #[snafu(display("failed to merge jvm argument overrides"))] + MergeJvmArgumentOverrides { source: role_utils::Error }, +} + +/// All JVM arguments. +fn construct_jvm_args( + zk: &ZookeeperCluster, + role: &Role, + role_group: &str, +) -> Result, Error> { + let logging_framework = zk.logging_framework(); + + let jvm_args = vec![ + format!("-Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE}"), + format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/server.yaml"), + match logging_framework { + LoggingFramework::LOG4J => format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), + LoggingFramework::LOGBACK => format!("-Dlogback.configurationFile={STACKABLE_LOG_CONFIG_DIR}/{LOGBACK_CONFIG_FILE}"), + }, + ]; + + let operator_generated = JvmArgumentOverrides::new_with_only_additions(jvm_args); + let merged = role + .get_merged_jvm_argument_overrides(role_group, &operator_generated) + .context(MergeJvmArgumentOverridesSnafu)?; + Ok(merged + .effective_jvm_config_after_merging() + // Sorry for the clone, that's how operator-rs is currently modelled :P + .clone()) +} + +/// Arguments that go into `SERVER_JVMFLAGS`, so *not* the heap settings (which you can get using +/// [`construct_zk_server_heap_env`]). +pub fn construct_non_heap_jvm_args( + zk: &ZookeeperCluster, + role: &Role, + role_group: &str, +) -> Result { + let mut jvm_args = construct_jvm_args(zk, role, role_group)?; + jvm_args.retain(|arg| !is_heap_jvm_argument(arg)); + + Ok(jvm_args.join(" ")) +} + +/// This will be put into `ZK_SERVER_HEAP`, which is just the heap size in megabytes (*without* the `m` +/// unit prepended). +pub fn construct_zk_server_heap_env(merged_config: &ZookeeperConfig) -> Result { + let heap_size_in_mb = (MemoryQuantity::try_from( + merged_config + .resources + .memory + .limit + .as_ref() + .context(MissingMemoryResourceConfigSnafu)?, + ) + .context(InvalidMemoryConfigSnafu)? + * JAVA_HEAP_FACTOR) + .scale_to(BinaryMultiple::Mebi); + + Ok((heap_size_in_mb.value.floor() as u32).to_string()) +} + +fn is_heap_jvm_argument(jvm_argument: &str) -> bool { + let lowercase = jvm_argument.to_lowercase(); + + lowercase.starts_with("-xms") || lowercase.starts_with("-xmx") +} + +#[cfg(test)] +mod tests { + use crate::crd::{v1alpha1::ZookeeperConfig, ZookeeperRole}; + + use super::*; + + #[test] + fn test_construct_jvm_arguments_defaults() { + let input = r#" + apiVersion: zookeeper.stackable.tech/v1alpha1 + kind: ZookeeperCluster + metadata: + name: simple-zookeeper + spec: + image: + productVersion: "3.9.2" + servers: + roleGroups: + default: + replicas: 1 + "#; + let (zookeeper, merged_config, role, rolegroup) = construct_boilerplate(input); + let non_heap_jvm_args = construct_non_heap_jvm_args(&zookeeper, &role, &rolegroup).unwrap(); + let zk_server_heap_env = construct_zk_server_heap_env(&merged_config).unwrap(); + + assert_eq!( + non_heap_jvm_args, + "-Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9505:/stackable/jmx/server.yaml \ + -Dlogback.configurationFile=/stackable/log_config/logback.xml" + ); + assert_eq!(zk_server_heap_env, "409"); + } + + #[test] + fn test_construct_jvm_argument_overrides() { + let input = r#" + apiVersion: zookeeper.stackable.tech/v1alpha1 + kind: ZookeeperCluster + metadata: + name: simple-zookeeper + spec: + image: + productVersion: "3.9.2" + servers: + config: + resources: + memory: + limit: 42Gi + jvmArgumentOverrides: + add: + - -Dhttps.proxyHost=proxy.my.corp + - -Dhttps.proxyPort=8080 + - -Djava.net.preferIPv4Stack=true + roleGroups: + default: + replicas: 1 + jvmArgumentOverrides: + # We need more memory! + removeRegex: + - -Xmx.* + - -Dhttps.proxyPort=.* + add: + - -Xmx40000m + - -Dhttps.proxyPort=1234 + "#; + let (zookeeper, merged_config, role, rolegroup) = construct_boilerplate(input); + let non_heap_jvm_args = construct_non_heap_jvm_args(&zookeeper, &role, &rolegroup).unwrap(); + let zk_server_heap_env = construct_zk_server_heap_env(&merged_config).unwrap(); + + assert_eq!( + non_heap_jvm_args, + "-Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9505:/stackable/jmx/server.yaml \ + -Dlogback.configurationFile=/stackable/log_config/logback.xml \ + -Dhttps.proxyHost=proxy.my.corp \ + -Djava.net.preferIPv4Stack=true \ + -Dhttps.proxyPort=1234" + ); + assert_eq!(zk_server_heap_env, "34406"); + } + + fn construct_boilerplate( + zookeeper_cluster: &str, + ) -> ( + ZookeeperCluster, + ZookeeperConfig, + Role, + String, + ) { + let zookeeper: ZookeeperCluster = + serde_yaml::from_str(zookeeper_cluster).expect("illegal test input"); + + let zookeeper_role = ZookeeperRole::Server; + let rolegroup_ref = zookeeper.server_rolegroup_ref("default"); + let merged_config = zookeeper + .merged_config(&zookeeper_role, &rolegroup_ref) + .unwrap(); + let role = zookeeper.spec.servers.clone().unwrap(); + + (zookeeper, merged_config, role, "default".to_owned()) + } +} diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs new file mode 100644 index 00000000..271c6d99 --- /dev/null +++ b/rust/operator-binary/src/config/mod.rs @@ -0,0 +1 @@ +pub mod jvm; diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 10787617..cd21c6d0 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -25,9 +25,7 @@ use stackable_operator::{ memory::{BinaryMultiple, MemoryQuantity}, product_config_utils::{self, Configuration}, product_logging::{self, spec::Logging}, - role_utils::{ - GenericProductSpecificCommonConfig, GenericRoleConfig, Role, RoleGroup, RoleGroupRef, - }, + role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, time::Duration, @@ -71,8 +69,6 @@ pub const MAX_PREPARE_LOG_FILE_SIZE: MemoryQuantity = MemoryQuantity { unit: BinaryMultiple::Mebi, }; -const JVM_HEAP_FACTOR: f32 = 0.8; - pub const DOCKER_IMAGE_BASE_NAME: &str = "zookeeper"; const DEFAULT_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(2); @@ -101,15 +97,6 @@ pub enum Error { #[snafu(display("fragment validation failure"))] FragmentValidationFailure { source: ValidationError }, - - #[snafu(display("invalid java heap config - missing default or value in crd?"))] - InvalidJavaHeapConfig, - - #[snafu(display("failed to convert java heap config to unit [{unit}]"))] - FailedToConvertJavaHeap { - source: stackable_operator::memory::Error, - unit: String, - }, } pub enum LoggingFramework { @@ -142,14 +129,17 @@ pub mod versioned { /// The settings in the `clusterConfig` are cluster wide settings that do not need to be configurable at role or role group level. #[serde(default = "cluster_config_default")] pub cluster_config: ZookeeperClusterConfig, + // no doc - it's in the struct. #[serde(default)] pub cluster_operation: ClusterOperation, + // no doc - it's in the struct. pub image: ProductImage, + // no doc - it's in the struct. #[serde(skip_serializing_if = "Option::is_none")] - pub servers: Option>, + pub servers: Option>, } #[derive(Clone, Deserialize, Debug, Eq, JsonSchema, PartialEq, Serialize)] @@ -384,8 +374,6 @@ impl v1alpha1::ZookeeperConfig { pub const DATA_DIR: &'static str = "dataDir"; pub const MYID_OFFSET: &'static str = "MYID_OFFSET"; - pub const SERVER_JVMFLAGS: &'static str = "SERVER_JVMFLAGS"; - pub const ZK_SERVER_HEAP: &'static str = "ZK_SERVER_HEAP"; const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); @@ -431,33 +419,16 @@ impl Configuration for v1alpha1::ZookeeperConfigFragment { resource: &Self::Configurable, _role_name: &str, ) -> Result>, product_config_utils::Error> { - let logging_framework = resource.logging_framework(); - - let jvm_flags = [ - format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/server.yaml"), - match logging_framework { - LoggingFramework::LOG4J => format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), - LoggingFramework::LOGBACK => format!("-Dlogback.configurationFile={STACKABLE_LOG_CONFIG_DIR}/{LOGBACK_CONFIG_FILE}"), - }, - format!("-Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE}"), - ].join(" "); - - Ok([ - ( - v1alpha1::ZookeeperConfig::MYID_OFFSET.to_string(), - self.myid_offset - .or(v1alpha1::ZookeeperConfig::default_server_config( - &resource.name_any(), - &ZookeeperRole::Server, - ) - .myid_offset) - .map(|myid_offset| myid_offset.to_string()), - ), - ( - v1alpha1::ZookeeperConfig::SERVER_JVMFLAGS.to_string(), - Some(jvm_flags), - ), - ] + Ok([( + v1alpha1::ZookeeperConfig::MYID_OFFSET.to_string(), + self.myid_offset + .or(v1alpha1::ZookeeperConfig::default_server_config( + &resource.name_any(), + &ZookeeperRole::Server, + ) + .myid_offset) + .map(|myid_offset| myid_offset.to_string()), + )] .into()) } @@ -579,7 +550,8 @@ impl v1alpha1::ZookeeperCluster { pub fn role( &self, role_variant: &ZookeeperRole, - ) -> Result<&Role, Error> { + ) -> Result<&Role, Error> + { match role_variant { ZookeeperRole::Server => self.spec.servers.as_ref(), } @@ -592,10 +564,7 @@ impl v1alpha1::ZookeeperCluster { pub fn rolegroup( &self, rolegroup_ref: &RoleGroupRef, - ) -> Result< - RoleGroup, - Error, - > { + ) -> Result, Error> { let role_variant = ZookeeperRole::from_str(&rolegroup_ref.role).with_context(|_| { UnknownZookeeperRoleSnafu { role: rolegroup_ref.role.to_owned(), @@ -723,26 +692,6 @@ impl v1alpha1::ZookeeperCluster { Ok((vec![data_pvc], resources.into())) } - - pub fn heap_limits(&self, resources: &ResourceRequirements) -> Result, Error> { - let memory_limit = MemoryQuantity::try_from( - resources - .limits - .as_ref() - .and_then(|limits| limits.get("memory")) - .context(InvalidJavaHeapConfigSnafu)?, - ) - .context(FailedToConvertJavaHeapSnafu { - unit: BinaryMultiple::Mebi.to_java_memory_unit(), - })?; - - Ok(Some( - (memory_limit * JVM_HEAP_FACTOR) - .scale_to(BinaryMultiple::Mebi) - .floor() - .value as u32, - )) - } } #[cfg(test)] diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 23f9af50..aa152812 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -26,6 +26,7 @@ use stackable_operator::{ use crate::{zk_controller::ZK_FULL_CONTROLLER_NAME, znode_controller::ZNODE_FULL_CONTROLLER_NAME}; mod command; +mod config; pub mod crd; mod discovery; mod operations; diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index cfc2e14f..0dc389c9 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -68,6 +68,7 @@ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ command::create_init_container_command_args, + config::jvm::{construct_non_heap_jvm_args, construct_zk_server_heap_env}, crd::{ security::{self, ZookeeperSecurity}, v1alpha1, ZookeeperRole, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, @@ -191,9 +192,6 @@ pub enum Error { source: stackable_operator::client::Error, }, - #[snafu(display("invalid java heap config"))] - InvalidJavaHeapConfig { source: crate::crd::Error }, - #[snafu(display("failed to create RBAC service account"))] ApplyServiceAccount { source: stackable_operator::cluster_resources::Error, @@ -267,6 +265,9 @@ pub enum Error { CreateClusterResources { source: stackable_operator::cluster_resources::Error, }, + + #[snafu(display("failed to construct JVM arguments"))] + ConstructJvmArguments { source: crate::config::jvm::Error }, } impl ReconcilerError for Error { @@ -295,7 +296,6 @@ impl ReconcilerError for Error { Error::BuildDiscoveryConfig { .. } => None, Error::ApplyDiscoveryConfig { .. } => None, Error::ApplyStatus { .. } => None, - Error::InvalidJavaHeapConfig { .. } => None, Error::ApplyServiceAccount { .. } => None, Error::ApplyRoleBinding { .. } => None, Error::BuildRbacResources { .. } => None, @@ -313,6 +313,7 @@ impl ReconcilerError for Error { Error::AddVolume { .. } => None, Error::AddVolumeMount { .. } => None, Error::CreateClusterResources { .. } => None, + Error::ConstructJvmArguments { .. } => None, } } } @@ -764,7 +765,7 @@ fn build_server_rolegroup_statefulset( .logging(zk_role, rolegroup_ref) .context(CrdValidationFailureSnafu)?; - let mut env_vars = server_config + let env_vars = server_config .get(&PropertyNameKind::Env) .into_iter() .flatten() @@ -778,17 +779,6 @@ fn build_server_rolegroup_statefulset( let (pvc, resources) = zk .resources(zk_role, rolegroup_ref) .context(CrdValidationFailureSnafu)?; - // set heap size if available - let heap_limits = zk - .heap_limits(&resources) - .context(InvalidJavaHeapConfigSnafu)?; - if let Some(heap_limits) = heap_limits { - env_vars.push(EnvVar { - name: v1alpha1::ZookeeperConfig::ZK_SERVER_HEAP.to_string(), - value: Some(heap_limits.to_string()), - ..EnvVar::default() - }); - } let mut cb_prepare = ContainerBuilder::new("prepare").expect("invalid hard-coded container name"); @@ -833,6 +823,15 @@ fn build_server_rolegroup_statefulset( ]) .args(vec![args.join("\n")]) .add_env_vars(env_vars.clone()) + .add_env_var( + "ZK_SERVER_HEAP", + construct_zk_server_heap_env(merged_config).context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + "SERVER_JVMFLAGS", + construct_non_heap_jvm_args(zk, role, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) .add_env_vars(vec![EnvVar { name: "POD_NAME".to_string(), value_from: Some(EnvVarSource { From ee6dbd4b06995dc96c9ebc2bbca9127477582494 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 26 Feb 2025 14:20:02 +0100 Subject: [PATCH 2/4] changelog --- CHANGELOG.md | 2 + .../pages/usage_guide/overrides.adoc | 103 ++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 docs/modules/zookeeper/pages/usage_guide/overrides.adoc diff --git a/CHANGELOG.md b/CHANGELOG.md index f84c218e..df5b58bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. config property `requestedSecretLifetime`. This helps reduce frequent Pod restarts ([#892]). - Run a `containerdebug` process in the background of each Zookeeper container to collect debugging information ([#881]). - Aggregate emitted Kubernetes events on the CustomResources ([#904]). +- Support configuring JVM arguments ([#919]). ### Changed @@ -19,6 +20,7 @@ All notable changes to this project will be documented in this file. [#892]: https://github.com/stackabletech/zookeeper-operator/pull/892 [#904]: https://github.com/stackabletech/zookeeper-operator/pull/904 [#905]: https://github.com/stackabletech/zookeeper-operator/pull/905 +[#919]: https://github.com/stackabletech/zookeeper-operator/pull/919 ## [24.11.1] - 2025-01-10 diff --git a/docs/modules/zookeeper/pages/usage_guide/overrides.adoc b/docs/modules/zookeeper/pages/usage_guide/overrides.adoc new file mode 100644 index 00000000..ce09fb54 --- /dev/null +++ b/docs/modules/zookeeper/pages/usage_guide/overrides.adoc @@ -0,0 +1,103 @@ + += Configuration and environment overrides + +The cluster definition also supports overriding configuration properties and environment variables, either per role or per role group, where the more specific override (role group) has precedence over the less specific one (role). + +IMPORTANT: Overriding certain properties which are set by operator (such as the ports) can interfere with the operator and can lead to problems. + +== Configuration properties + +For a role or role group, at the same level of `config`, you can specify: `configOverrides` for the `zoo.cfg` and `security.properties` files. + +=== Overriding entries in zoo.cfg + +For example, if you want to set the `4lw.commands.whitelist` to allow the `ruok` administrative command, it can be configured in the `ZookeeperCluster` resource like so: + +[source,yaml] +---- +servers: + roleGroups: + default: + configOverrides: + zoo.cfg: + 4lw.commands.whitelist: "srvr, ruok" + replicas: 1 +---- + +Just as for the `config`, it is possible to specify this at role level as well: + +[source,yaml] +---- +servers: + configOverrides: + zoo.cfg: + 4lw.commands.whitelist: "srvr, ruok" + roleGroups: + default: + replicas: 1 +---- + +All property values must be strings. + +For a full list of configuration options refer to the Apache ZooKeeper https://zookeeper.apache.org/doc/r3.9.2/zookeeperAdmin.html#sc_configuration[Configuration Reference]. + +=== Overriding entries in security.properties + +The `security.properties` file is used to configure JVM security properties. It is very seldom that users need to tweak any of these, but there is one use-case that stands out, and that users need to be aware of: the JVM DNS cache. + +The JVM manages its own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. +Some products of the Stackable platform are very sensible to the contents of these caches and their performance is heavily affected by them. +As of version 3.8.1, Apache ZooKeeper always requires up-to-date IP addresses to maintain its quorum. +To guarantee this, the negative DNS cache of the JVM needs to be disabled. This can be achieved by setting the TTL of entries in the negative cache to zero, like this: + +[source,yaml] +---- + servers: + configOverrides: + security.properties: + networkaddress.cache.ttl: "5" + networkaddress.cache.negative.ttl: "0" +---- + +NOTE: The operator configures DNS caching by default as shown in the example above. + +For details on the JVM security see https://docs.oracle.com/en/java/javase/11/security/java-security-overview1.html + +== Environment variables + +In a similar fashion, environment variables can be (over)written. For example per role group: + +[source,yaml] +---- +servers: + roleGroups: + default: + envOverrides: + MY_ENV_VAR: "MY_VALUE" + replicas: 1 +---- + +or per role: + +[source,yaml] +---- +servers: + envOverrides: + MY_ENV_VAR: "MY_VALUE" + roleGroups: + default: + replicas: 1 +---- + +== Pod overrides + +The ZooKeeper operator also supports Pod overrides, allowing you to override any property that you can set on a Kubernetes Pod. +Read the xref:concepts:overrides.adoc#pod-overrides[Pod overrides documentation] to learn more about this feature. + +== JVM argument overrides + +Stackable operators automatically determine the set of needed JVM arguments, such as memory settings or trust- and keystores. +Using JVM argument overrides you can configure the JVM arguments xref:concepts:overrides.adoc#jvm-argument-overrides[according to the concepts page]. + +One thing that is different for Zookeeper, is that all heap-related arguments will be passed in via the env variable `ZK_SERVER_HEAP`, all the other ones via `SERVER_JVMFLAGS`. +`ZK_SERVER_HEAP` can *not* have a unit suffix, it will always be an integer representing the number of megabytes heap available. From b97f91ff924a3474fdf88032c1ca9107adc1af89 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 26 Feb 2025 14:29:34 +0100 Subject: [PATCH 3/4] Well I'm stupid... --- rust/operator-binary/src/zk_controller.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index 0dc389c9..a897eba2 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -823,15 +823,6 @@ fn build_server_rolegroup_statefulset( ]) .args(vec![args.join("\n")]) .add_env_vars(env_vars.clone()) - .add_env_var( - "ZK_SERVER_HEAP", - construct_zk_server_heap_env(merged_config).context(ConstructJvmArgumentsSnafu)?, - ) - .add_env_var( - "SERVER_JVMFLAGS", - construct_non_heap_jvm_args(zk, role, &rolegroup_ref.role_group) - .context(ConstructJvmArgumentsSnafu)?, - ) .add_env_vars(vec![EnvVar { name: "POD_NAME".to_string(), value_from: Some(EnvVarSource { @@ -885,6 +876,15 @@ fn build_server_rolegroup_statefulset( create_vector_shutdown_file_command(STACKABLE_LOG_DIR), }]) .add_env_vars(env_vars) + .add_env_var( + "ZK_SERVER_HEAP", + construct_zk_server_heap_env(merged_config).context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + "SERVER_JVMFLAGS", + construct_non_heap_jvm_args(zk, role, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) .add_env_var( "CONTAINERDEBUG_LOG_DIRECTORY", format!("{STACKABLE_LOG_DIR}/containerdebug"), From 17ecf292caa5876ffb10b1a09cb67cbc9835ab0a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 26 Feb 2025 14:31:40 +0100 Subject: [PATCH 4/4] cargo fmt --- rust/operator-binary/src/config/jvm.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 48998491..7f36bc6d 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -92,9 +92,8 @@ fn is_heap_jvm_argument(jvm_argument: &str) -> bool { #[cfg(test)] mod tests { - use crate::crd::{v1alpha1::ZookeeperConfig, ZookeeperRole}; - use super::*; + use crate::crd::{v1alpha1::ZookeeperConfig, ZookeeperRole}; #[test] fn test_construct_jvm_arguments_defaults() {