diff --git a/CHANGELOG.md b/CHANGELOG.md index f84c218e..df5b58bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. config property `requestedSecretLifetime`. This helps reduce frequent Pod restarts ([#892]). - Run a `containerdebug` process in the background of each Zookeeper container to collect debugging information ([#881]). - Aggregate emitted Kubernetes events on the CustomResources ([#904]). +- Support configuring JVM arguments ([#919]). ### Changed @@ -19,6 +20,7 @@ All notable changes to this project will be documented in this file. [#892]: https://github.com/stackabletech/zookeeper-operator/pull/892 [#904]: https://github.com/stackabletech/zookeeper-operator/pull/904 [#905]: https://github.com/stackabletech/zookeeper-operator/pull/905 +[#919]: https://github.com/stackabletech/zookeeper-operator/pull/919 ## [24.11.1] - 2025-01-10 diff --git a/deploy/helm/zookeeper-operator/crds/crds.yaml b/deploy/helm/zookeeper-operator/crds/crds.yaml index 82389036..1ecc3a6a 100644 --- a/deploy/helm/zookeeper-operator/crds/crds.yaml +++ b/deploy/helm/zookeeper-operator/crds/crds.yaml @@ -411,6 +411,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -698,6 +724,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. diff --git a/docs/modules/zookeeper/pages/usage_guide/configuration_environment_overrides.adoc b/docs/modules/zookeeper/pages/usage_guide/overrides.adoc similarity index 84% rename from docs/modules/zookeeper/pages/usage_guide/configuration_environment_overrides.adoc rename to docs/modules/zookeeper/pages/usage_guide/overrides.adoc index 3e81fc05..ce09fb54 100644 --- a/docs/modules/zookeeper/pages/usage_guide/configuration_environment_overrides.adoc +++ b/docs/modules/zookeeper/pages/usage_guide/overrides.adoc @@ -93,3 +93,11 @@ servers: The ZooKeeper operator also supports Pod overrides, allowing you to override any property that you can set on a Kubernetes Pod. Read the xref:concepts:overrides.adoc#pod-overrides[Pod overrides documentation] to learn more about this feature. + +== JVM argument overrides + +Stackable operators automatically determine the set of needed JVM arguments, such as memory settings or trust- and keystores. +Using JVM argument overrides you can configure the JVM arguments xref:concepts:overrides.adoc#jvm-argument-overrides[according to the concepts page]. + +One thing that is different for Zookeeper, is that all heap-related arguments will be passed in via the env variable `ZK_SERVER_HEAP`, all the other ones via `SERVER_JVMFLAGS`. +`ZK_SERVER_HEAP` can *not* have a unit suffix, it will always be an integer representing the number of megabytes heap available. diff --git a/docs/modules/zookeeper/partials/nav.adoc b/docs/modules/zookeeper/partials/nav.adoc index 5281680f..bd21ee88 100644 --- a/docs/modules/zookeeper/partials/nav.adoc +++ b/docs/modules/zookeeper/partials/nav.adoc @@ -12,7 +12,7 @@ ** xref:zookeeper:usage_guide/log_aggregation.adoc[] ** xref:zookeeper:usage_guide/using_multiple_role_groups.adoc[] ** xref:zookeeper:usage_guide/isolating_clients_with_znodes.adoc[] -** xref:zookeeper:usage_guide/configuration_environment_overrides.adoc[] +** xref:zookeeper:usage_guide/overrides.adoc[] ** xref:zookeeper:usage_guide/operations/index.adoc[] *** xref:zookeeper:usage_guide/operations/cluster-operations.adoc[] *** xref:zookeeper:usage_guide/operations/pod-placement.adoc[] diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs new file mode 100644 index 00000000..7f36bc6d --- /dev/null +++ b/rust/operator-binary/src/config/jvm.rs @@ -0,0 +1,194 @@ +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + memory::{BinaryMultiple, MemoryQuantity}, + role_utils::{self, GenericRoleConfig, JavaCommonConfig, JvmArgumentOverrides, Role}, +}; + +use crate::crd::{ + v1alpha1::{ZookeeperCluster, ZookeeperConfig, ZookeeperConfigFragment}, + LoggingFramework, JVM_SECURITY_PROPERTIES_FILE, LOG4J_CONFIG_FILE, LOGBACK_CONFIG_FILE, + METRICS_PORT, STACKABLE_CONFIG_DIR, STACKABLE_LOG_CONFIG_DIR, +}; + +const JAVA_HEAP_FACTOR: f32 = 0.8; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("invalid memory resource configuration - missing default or value in crd?"))] + MissingMemoryResourceConfig, + + #[snafu(display("invalid memory config"))] + InvalidMemoryConfig { + source: stackable_operator::memory::Error, + }, + + #[snafu(display("failed to merge jvm argument overrides"))] + MergeJvmArgumentOverrides { source: role_utils::Error }, +} + +/// All JVM arguments. +fn construct_jvm_args( + zk: &ZookeeperCluster, + role: &Role, + role_group: &str, +) -> Result, Error> { + let logging_framework = zk.logging_framework(); + + let jvm_args = vec![ + format!("-Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE}"), + format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/server.yaml"), + match logging_framework { + LoggingFramework::LOG4J => format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), + LoggingFramework::LOGBACK => format!("-Dlogback.configurationFile={STACKABLE_LOG_CONFIG_DIR}/{LOGBACK_CONFIG_FILE}"), + }, + ]; + + let operator_generated = JvmArgumentOverrides::new_with_only_additions(jvm_args); + let merged = role + .get_merged_jvm_argument_overrides(role_group, &operator_generated) + .context(MergeJvmArgumentOverridesSnafu)?; + Ok(merged + .effective_jvm_config_after_merging() + // Sorry for the clone, that's how operator-rs is currently modelled :P + .clone()) +} + +/// Arguments that go into `SERVER_JVMFLAGS`, so *not* the heap settings (which you can get using +/// [`construct_zk_server_heap_env`]). +pub fn construct_non_heap_jvm_args( + zk: &ZookeeperCluster, + role: &Role, + role_group: &str, +) -> Result { + let mut jvm_args = construct_jvm_args(zk, role, role_group)?; + jvm_args.retain(|arg| !is_heap_jvm_argument(arg)); + + Ok(jvm_args.join(" ")) +} + +/// This will be put into `ZK_SERVER_HEAP`, which is just the heap size in megabytes (*without* the `m` +/// unit prepended). +pub fn construct_zk_server_heap_env(merged_config: &ZookeeperConfig) -> Result { + let heap_size_in_mb = (MemoryQuantity::try_from( + merged_config + .resources + .memory + .limit + .as_ref() + .context(MissingMemoryResourceConfigSnafu)?, + ) + .context(InvalidMemoryConfigSnafu)? + * JAVA_HEAP_FACTOR) + .scale_to(BinaryMultiple::Mebi); + + Ok((heap_size_in_mb.value.floor() as u32).to_string()) +} + +fn is_heap_jvm_argument(jvm_argument: &str) -> bool { + let lowercase = jvm_argument.to_lowercase(); + + lowercase.starts_with("-xms") || lowercase.starts_with("-xmx") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::crd::{v1alpha1::ZookeeperConfig, ZookeeperRole}; + + #[test] + fn test_construct_jvm_arguments_defaults() { + let input = r#" + apiVersion: zookeeper.stackable.tech/v1alpha1 + kind: ZookeeperCluster + metadata: + name: simple-zookeeper + spec: + image: + productVersion: "3.9.2" + servers: + roleGroups: + default: + replicas: 1 + "#; + let (zookeeper, merged_config, role, rolegroup) = construct_boilerplate(input); + let non_heap_jvm_args = construct_non_heap_jvm_args(&zookeeper, &role, &rolegroup).unwrap(); + let zk_server_heap_env = construct_zk_server_heap_env(&merged_config).unwrap(); + + assert_eq!( + non_heap_jvm_args, + "-Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9505:/stackable/jmx/server.yaml \ + -Dlogback.configurationFile=/stackable/log_config/logback.xml" + ); + assert_eq!(zk_server_heap_env, "409"); + } + + #[test] + fn test_construct_jvm_argument_overrides() { + let input = r#" + apiVersion: zookeeper.stackable.tech/v1alpha1 + kind: ZookeeperCluster + metadata: + name: simple-zookeeper + spec: + image: + productVersion: "3.9.2" + servers: + config: + resources: + memory: + limit: 42Gi + jvmArgumentOverrides: + add: + - -Dhttps.proxyHost=proxy.my.corp + - -Dhttps.proxyPort=8080 + - -Djava.net.preferIPv4Stack=true + roleGroups: + default: + replicas: 1 + jvmArgumentOverrides: + # We need more memory! + removeRegex: + - -Xmx.* + - -Dhttps.proxyPort=.* + add: + - -Xmx40000m + - -Dhttps.proxyPort=1234 + "#; + let (zookeeper, merged_config, role, rolegroup) = construct_boilerplate(input); + let non_heap_jvm_args = construct_non_heap_jvm_args(&zookeeper, &role, &rolegroup).unwrap(); + let zk_server_heap_env = construct_zk_server_heap_env(&merged_config).unwrap(); + + assert_eq!( + non_heap_jvm_args, + "-Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9505:/stackable/jmx/server.yaml \ + -Dlogback.configurationFile=/stackable/log_config/logback.xml \ + -Dhttps.proxyHost=proxy.my.corp \ + -Djava.net.preferIPv4Stack=true \ + -Dhttps.proxyPort=1234" + ); + assert_eq!(zk_server_heap_env, "34406"); + } + + fn construct_boilerplate( + zookeeper_cluster: &str, + ) -> ( + ZookeeperCluster, + ZookeeperConfig, + Role, + String, + ) { + let zookeeper: ZookeeperCluster = + serde_yaml::from_str(zookeeper_cluster).expect("illegal test input"); + + let zookeeper_role = ZookeeperRole::Server; + let rolegroup_ref = zookeeper.server_rolegroup_ref("default"); + let merged_config = zookeeper + .merged_config(&zookeeper_role, &rolegroup_ref) + .unwrap(); + let role = zookeeper.spec.servers.clone().unwrap(); + + (zookeeper, merged_config, role, "default".to_owned()) + } +} diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs new file mode 100644 index 00000000..271c6d99 --- /dev/null +++ b/rust/operator-binary/src/config/mod.rs @@ -0,0 +1 @@ +pub mod jvm; diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 10787617..cd21c6d0 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -25,9 +25,7 @@ use stackable_operator::{ memory::{BinaryMultiple, MemoryQuantity}, product_config_utils::{self, Configuration}, product_logging::{self, spec::Logging}, - role_utils::{ - GenericProductSpecificCommonConfig, GenericRoleConfig, Role, RoleGroup, RoleGroupRef, - }, + role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, time::Duration, @@ -71,8 +69,6 @@ pub const MAX_PREPARE_LOG_FILE_SIZE: MemoryQuantity = MemoryQuantity { unit: BinaryMultiple::Mebi, }; -const JVM_HEAP_FACTOR: f32 = 0.8; - pub const DOCKER_IMAGE_BASE_NAME: &str = "zookeeper"; const DEFAULT_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(2); @@ -101,15 +97,6 @@ pub enum Error { #[snafu(display("fragment validation failure"))] FragmentValidationFailure { source: ValidationError }, - - #[snafu(display("invalid java heap config - missing default or value in crd?"))] - InvalidJavaHeapConfig, - - #[snafu(display("failed to convert java heap config to unit [{unit}]"))] - FailedToConvertJavaHeap { - source: stackable_operator::memory::Error, - unit: String, - }, } pub enum LoggingFramework { @@ -142,14 +129,17 @@ pub mod versioned { /// The settings in the `clusterConfig` are cluster wide settings that do not need to be configurable at role or role group level. #[serde(default = "cluster_config_default")] pub cluster_config: ZookeeperClusterConfig, + // no doc - it's in the struct. #[serde(default)] pub cluster_operation: ClusterOperation, + // no doc - it's in the struct. pub image: ProductImage, + // no doc - it's in the struct. #[serde(skip_serializing_if = "Option::is_none")] - pub servers: Option>, + pub servers: Option>, } #[derive(Clone, Deserialize, Debug, Eq, JsonSchema, PartialEq, Serialize)] @@ -384,8 +374,6 @@ impl v1alpha1::ZookeeperConfig { pub const DATA_DIR: &'static str = "dataDir"; pub const MYID_OFFSET: &'static str = "MYID_OFFSET"; - pub const SERVER_JVMFLAGS: &'static str = "SERVER_JVMFLAGS"; - pub const ZK_SERVER_HEAP: &'static str = "ZK_SERVER_HEAP"; const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); @@ -431,33 +419,16 @@ impl Configuration for v1alpha1::ZookeeperConfigFragment { resource: &Self::Configurable, _role_name: &str, ) -> Result>, product_config_utils::Error> { - let logging_framework = resource.logging_framework(); - - let jvm_flags = [ - format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/server.yaml"), - match logging_framework { - LoggingFramework::LOG4J => format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), - LoggingFramework::LOGBACK => format!("-Dlogback.configurationFile={STACKABLE_LOG_CONFIG_DIR}/{LOGBACK_CONFIG_FILE}"), - }, - format!("-Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE}"), - ].join(" "); - - Ok([ - ( - v1alpha1::ZookeeperConfig::MYID_OFFSET.to_string(), - self.myid_offset - .or(v1alpha1::ZookeeperConfig::default_server_config( - &resource.name_any(), - &ZookeeperRole::Server, - ) - .myid_offset) - .map(|myid_offset| myid_offset.to_string()), - ), - ( - v1alpha1::ZookeeperConfig::SERVER_JVMFLAGS.to_string(), - Some(jvm_flags), - ), - ] + Ok([( + v1alpha1::ZookeeperConfig::MYID_OFFSET.to_string(), + self.myid_offset + .or(v1alpha1::ZookeeperConfig::default_server_config( + &resource.name_any(), + &ZookeeperRole::Server, + ) + .myid_offset) + .map(|myid_offset| myid_offset.to_string()), + )] .into()) } @@ -579,7 +550,8 @@ impl v1alpha1::ZookeeperCluster { pub fn role( &self, role_variant: &ZookeeperRole, - ) -> Result<&Role, Error> { + ) -> Result<&Role, Error> + { match role_variant { ZookeeperRole::Server => self.spec.servers.as_ref(), } @@ -592,10 +564,7 @@ impl v1alpha1::ZookeeperCluster { pub fn rolegroup( &self, rolegroup_ref: &RoleGroupRef, - ) -> Result< - RoleGroup, - Error, - > { + ) -> Result, Error> { let role_variant = ZookeeperRole::from_str(&rolegroup_ref.role).with_context(|_| { UnknownZookeeperRoleSnafu { role: rolegroup_ref.role.to_owned(), @@ -723,26 +692,6 @@ impl v1alpha1::ZookeeperCluster { Ok((vec![data_pvc], resources.into())) } - - pub fn heap_limits(&self, resources: &ResourceRequirements) -> Result, Error> { - let memory_limit = MemoryQuantity::try_from( - resources - .limits - .as_ref() - .and_then(|limits| limits.get("memory")) - .context(InvalidJavaHeapConfigSnafu)?, - ) - .context(FailedToConvertJavaHeapSnafu { - unit: BinaryMultiple::Mebi.to_java_memory_unit(), - })?; - - Ok(Some( - (memory_limit * JVM_HEAP_FACTOR) - .scale_to(BinaryMultiple::Mebi) - .floor() - .value as u32, - )) - } } #[cfg(test)] diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 23f9af50..aa152812 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -26,6 +26,7 @@ use stackable_operator::{ use crate::{zk_controller::ZK_FULL_CONTROLLER_NAME, znode_controller::ZNODE_FULL_CONTROLLER_NAME}; mod command; +mod config; pub mod crd; mod discovery; mod operations; diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index cfc2e14f..a897eba2 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -68,6 +68,7 @@ use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ command::create_init_container_command_args, + config::jvm::{construct_non_heap_jvm_args, construct_zk_server_heap_env}, crd::{ security::{self, ZookeeperSecurity}, v1alpha1, ZookeeperRole, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, @@ -191,9 +192,6 @@ pub enum Error { source: stackable_operator::client::Error, }, - #[snafu(display("invalid java heap config"))] - InvalidJavaHeapConfig { source: crate::crd::Error }, - #[snafu(display("failed to create RBAC service account"))] ApplyServiceAccount { source: stackable_operator::cluster_resources::Error, @@ -267,6 +265,9 @@ pub enum Error { CreateClusterResources { source: stackable_operator::cluster_resources::Error, }, + + #[snafu(display("failed to construct JVM arguments"))] + ConstructJvmArguments { source: crate::config::jvm::Error }, } impl ReconcilerError for Error { @@ -295,7 +296,6 @@ impl ReconcilerError for Error { Error::BuildDiscoveryConfig { .. } => None, Error::ApplyDiscoveryConfig { .. } => None, Error::ApplyStatus { .. } => None, - Error::InvalidJavaHeapConfig { .. } => None, Error::ApplyServiceAccount { .. } => None, Error::ApplyRoleBinding { .. } => None, Error::BuildRbacResources { .. } => None, @@ -313,6 +313,7 @@ impl ReconcilerError for Error { Error::AddVolume { .. } => None, Error::AddVolumeMount { .. } => None, Error::CreateClusterResources { .. } => None, + Error::ConstructJvmArguments { .. } => None, } } } @@ -764,7 +765,7 @@ fn build_server_rolegroup_statefulset( .logging(zk_role, rolegroup_ref) .context(CrdValidationFailureSnafu)?; - let mut env_vars = server_config + let env_vars = server_config .get(&PropertyNameKind::Env) .into_iter() .flatten() @@ -778,17 +779,6 @@ fn build_server_rolegroup_statefulset( let (pvc, resources) = zk .resources(zk_role, rolegroup_ref) .context(CrdValidationFailureSnafu)?; - // set heap size if available - let heap_limits = zk - .heap_limits(&resources) - .context(InvalidJavaHeapConfigSnafu)?; - if let Some(heap_limits) = heap_limits { - env_vars.push(EnvVar { - name: v1alpha1::ZookeeperConfig::ZK_SERVER_HEAP.to_string(), - value: Some(heap_limits.to_string()), - ..EnvVar::default() - }); - } let mut cb_prepare = ContainerBuilder::new("prepare").expect("invalid hard-coded container name"); @@ -886,6 +876,15 @@ fn build_server_rolegroup_statefulset( create_vector_shutdown_file_command(STACKABLE_LOG_DIR), }]) .add_env_vars(env_vars) + .add_env_var( + "ZK_SERVER_HEAP", + construct_zk_server_heap_env(merged_config).context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + "SERVER_JVMFLAGS", + construct_non_heap_jvm_args(zk, role, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) .add_env_var( "CONTAINERDEBUG_LOG_DIRECTORY", format!("{STACKABLE_LOG_DIR}/containerdebug"),