diff --git a/CHANGELOG.md b/CHANGELOG.md index 5de1c31c..cf9255a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ All notable changes to this project will be documented in this file. - `extraVolumes` - Increase `log` Volume size from 33 MiB to 500 MiB ([#671]). - Replaced experimental NiFi `2.0.0-M4` with `2.0.0` ([#702]). +- Don't deploy the `PrometheusReportingTask` Job for NiFi versions `2.x.x` and up ([#708]). ### Fixed @@ -46,6 +47,7 @@ All notable changes to this project will be documented in this file. [#694]: https://github.com/stackabletech/nifi-operator/pull/694 [#698]: https://github.com/stackabletech/nifi-operator/pull/698 [#702]: https://github.com/stackabletech/nifi-operator/pull/702 +[#708]: https://github.com/stackabletech/nifi-operator/pull/708 ## [24.7.0] - 2024-07-24 diff --git a/docs/modules/nifi/pages/usage_guide/monitoring.adoc b/docs/modules/nifi/pages/usage_guide/monitoring.adoc index 82f1e688..98d92b10 100644 --- a/docs/modules/nifi/pages/usage_guide/monitoring.adoc +++ b/docs/modules/nifi/pages/usage_guide/monitoring.adoc @@ -3,8 +3,15 @@ :k8s-job: https://kubernetes.io/docs/concepts/workloads/controllers/job/ :k8s-network-policies: https://kubernetes.io/docs/concepts/services-networking/network-policies/ -The operator automatically configures NiFi to export Prometheus metrics. -This is done by creating a {k8s-job}[Job] that connects to NiFi and configures a reporting task. +In November 2024, Apache NiFi released a new major version https://cwiki.apache.org/confluence/display/NIFI/Release+Notes#ReleaseNotes-Version2.0.0[`2.0.0`]. + +The NiFi `2.0.0` release changed the way of exposing Prometheus metrics significantly. +The following steps explain on how to expose Metrics in NiFi versions `1.x.x` and `2.x.x`. + +== Configure metrics in NiFi `1.x.x` + +For NiFi versions `1.x.x`, the operator automatically configures NiFi to export Prometheus metrics. +This is done by creating a {k8s-job}[Job] that connects to NiFi and configures a https://nifi.apache.org/docs/nifi-docs/components/org.apache.nifi/nifi-prometheus-nar/1.26.0/org.apache.nifi.reporting.prometheus.PrometheusReportingTask/index.html[Prometheus Reporting Task]. IMPORTANT: Network access from the Job to NiFi is required. If you are running a Kubernetes with restrictive {k8s-network-policies}[NetworkPolicies], make sure to allow access from the Job to NiFi. @@ -24,3 +31,16 @@ spec: createReportingTaskJob: enabled: false ---- + +== Configure metrics in NiFi `2.x.x` + +The Prometheus Reporting Task was removed in NiFi `2.x.x` in https://issues.apache.org/jira/browse/NIFI-13507[NIFI-13507]. +Metrics are now always exposed and can be scraped using the NiFi Pod FQDN and the HTTP path `/nifi-api/flow/metrics/prometheus`. + +For a deployed single node NiFi cluster called `simple-nifi`, containing a rolegroup called `default`, the metrics endpoint is reachable under: + +``` +https://simple-nifi-node-default-0.simple-nifi-node-default..svc.cluster.local:8443/nifi-api/flow/metrics/prometheus +``` + +IMPORTANT: If NiFi is configured to do any user authentication, requests to the metric endpoint must be authenticated and authorized. diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index df4282d6..c6e3111a 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -85,7 +85,7 @@ use crate::{ }, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::{extend_role_group_config_map, resolve_vector_aggregator_address}, - reporting_task::{self, build_reporting_task, build_reporting_task_service_name}, + reporting_task::{self, build_maybe_reporting_task, build_reporting_task_service_name}, security::{ authentication::{ NifiAuthenticationConfig, AUTHORIZERS_XML_FILE_NAME, @@ -616,24 +616,25 @@ pub async fn reconcile_nifi( // Only add the reporting task in case it is enabled. if nifi.spec.cluster_config.create_reporting_task_job.enabled { - let (reporting_task_job, reporting_task_service) = build_reporting_task( + if let Some((reporting_task_job, reporting_task_service)) = build_maybe_reporting_task( nifi, &resolved_product_image, &client.kubernetes_cluster_info, &nifi_authentication_config, &rbac_sa.name_any(), ) - .context(ReportingTaskSnafu)?; - - cluster_resources - .add(client, reporting_task_service) - .await - .context(ApplyCreateReportingTaskServiceSnafu)?; + .context(ReportingTaskSnafu)? + { + cluster_resources + .add(client, reporting_task_service) + .await + .context(ApplyCreateReportingTaskServiceSnafu)?; - cluster_resources - .add(client, reporting_task_job) - .await - .context(ApplyCreateReportingTaskJobSnafu)?; + cluster_resources + .add(client, reporting_task_job) + .await + .context(ApplyCreateReportingTaskJobSnafu)?; + } } // Remove any orphaned resources that still exist in k8s, but have not been added to diff --git a/rust/operator-binary/src/reporting_task/mod.rs b/rust/operator-binary/src/reporting_task/mod.rs index b0a6b59e..cf32756a 100644 --- a/rust/operator-binary/src/reporting_task/mod.rs +++ b/rust/operator-binary/src/reporting_task/mod.rs @@ -112,7 +112,8 @@ pub enum Error { type Result = std::result::Result; -/// Build required resources to create the reporting task in NiFi. +/// Build required resources to create the reporting task in NiFi versions 1.x. +/// /// This will return /// * a Job that creates and runs the reporting task via the NiFi Rest API. /// * a Service that contains of one single NiFi node. @@ -122,23 +123,30 @@ type Result = std::result::Result; /// from SingleUserLoginIdentityProvider to the FQDN of the pod. /// The NiFi role service will randomly delegate to different NiFi nodes which will /// then fail requests to other nodes. -pub fn build_reporting_task( +/// +/// NiFi 2.x and above automatically server Prometheus metrics via the API, but as of 2024-11-08 +/// requires authentication. +pub fn build_maybe_reporting_task( nifi: &NifiCluster, resolved_product_image: &ResolvedProductImage, cluster_info: &KubernetesClusterInfo, nifi_auth_config: &NifiAuthenticationConfig, sa_name: &str, -) -> Result<(Job, Service)> { - Ok(( - build_reporting_task_job( - nifi, - resolved_product_image, - cluster_info, - nifi_auth_config, - sa_name, - )?, - build_reporting_task_service(nifi, resolved_product_image)?, - )) +) -> Result> { + if resolved_product_image.product_version.starts_with("1.") { + Ok(Some(( + build_reporting_task_job( + nifi, + resolved_product_image, + cluster_info, + nifi_auth_config, + sa_name, + )?, + build_reporting_task_service(nifi, resolved_product_image)?, + ))) + } else { + Ok(None) + } } /// Return the name of the reporting task. diff --git a/tests/templates/kuttl/smoke/60-assert.yaml b/tests/templates/kuttl/smoke/60-assert.yaml.j2 similarity index 82% rename from tests/templates/kuttl/smoke/60-assert.yaml rename to tests/templates/kuttl/smoke/60-assert.yaml.j2 index 1d531af0..d3240ee8 100644 --- a/tests/templates/kuttl/smoke/60-assert.yaml +++ b/tests/templates/kuttl/smoke/60-assert.yaml.j2 @@ -4,4 +4,6 @@ kind: TestAssert timeout: 300 commands: - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi.py -u admin -p 'passwordWithSpecialCharacter\@<&>"'"'" -n $NAMESPACE -c 3 +{% if test_scenario['values']['nifi'].startswith('1.') %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi_metrics.py -n $NAMESPACE +{% endif %} diff --git a/tests/templates/kuttl/upgrade/04-assert.yaml b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 similarity index 87% rename from tests/templates/kuttl/upgrade/04-assert.yaml rename to tests/templates/kuttl/upgrade/04-assert.yaml.j2 index d6a4856d..9f90b3f1 100644 --- a/tests/templates/kuttl/upgrade/04-assert.yaml +++ b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 @@ -4,5 +4,7 @@ kind: TestAssert timeout: 300 commands: - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi.py -u admin -p supersecretpassword -n $NAMESPACE -c 3 +{% if test_scenario['values']['nifi_old'].startswith('1.') %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi_metrics.py -n $NAMESPACE +{% endif %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- sh -c "python /tmp/flow.py -e https://test-nifi-node-default-0.test-nifi-node-default.$NAMESPACE.svc.cluster.local:8443 run -t /tmp/generate-and-log-flowfiles.xml > /tmp/old_input" diff --git a/tests/templates/kuttl/upgrade/07-assert.yaml b/tests/templates/kuttl/upgrade/07-assert.yaml.j2 similarity index 90% rename from tests/templates/kuttl/upgrade/07-assert.yaml rename to tests/templates/kuttl/upgrade/07-assert.yaml.j2 index eda1e9fc..24abfbc8 100644 --- a/tests/templates/kuttl/upgrade/07-assert.yaml +++ b/tests/templates/kuttl/upgrade/07-assert.yaml.j2 @@ -6,7 +6,9 @@ metadata: timeout: 300 commands: - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi.py -u admin -p supersecretpassword -n $NAMESPACE -c 3 +{% if test_scenario['values']['nifi_new'].startswith('1.') %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi_metrics.py -n $NAMESPACE +{% endif %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- sh -c "python /tmp/flow.py -e https://test-nifi-node-default-0.test-nifi-node-default.$NAMESPACE.svc.cluster.local:8443 query > /tmp/new_input" # This tests that the number of input records stays the same after the upgrade. - script: kubectl exec -n $NAMESPACE test-nifi-0 -- diff /tmp/old_input /tmp/new_input