From df03ec3442925a0edf74d722b01e37f7458a7705 Mon Sep 17 00:00:00 2001
From: Walter Rafelsberger <walter.rafelsberger@elastic.co>
Date: Thu, 28 Sep 2023 14:02:54 +0200
Subject: [PATCH] fix handling of multiple text field names

---
 x-pack/packages/ml/agg_utils/src/types.ts     |   3 +-
 .../artificial_logs/significant_terms.ts      |   4 +
 .../aiops/common/api/stream_reducer.test.ts   |   1 +
 ...uild_extended_base_filter_criteria.test.ts |   1 +
 .../log_rate_analysis_results_table.tsx       |  18 ++-
 .../aiops/server/routes/log_rate_analysis.ts  |   3 +-
 .../queries/fetch_significant_categories.ts   | 107 +++++++++++-------
 .../fetch_significant_term_p_values.ts        |   3 +-
 8 files changed, 93 insertions(+), 47 deletions(-)

diff --git a/x-pack/packages/ml/agg_utils/src/types.ts b/x-pack/packages/ml/agg_utils/src/types.ts
index 89d2298621aca..6e7293c763358 100644
--- a/x-pack/packages/ml/agg_utils/src/types.ts
+++ b/x-pack/packages/ml/agg_utils/src/types.ts
@@ -63,6 +63,8 @@ export interface HistogramField {
  * aggregation type.
  */
 export interface SignificantTerm extends FieldValuePair {
+  key: string;
+  type: 'keyword' | 'log-pattern';
   doc_count: number;
   bg_count: number;
   total_doc_count: number;
@@ -70,7 +72,6 @@ export interface SignificantTerm extends FieldValuePair {
   score: number;
   pValue: number | null;
   normalizedScore: number;
-  type: 'keyword' | 'log-pattern';
   histogram?: SignificantTermHistogramItem[];
   unique?: boolean;
 }
diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_terms.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_terms.ts
index 379a74c9965f6..94a0212027eb7 100644
--- a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_terms.ts
+++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_terms.ts
@@ -9,6 +9,7 @@ import type { SignificantTerm } from '@kbn/ml-agg-utils';
 
 export const significantTerms: SignificantTerm[] = [
   {
+    key: 'user:Peter',
     fieldName: 'user',
     fieldValue: 'Peter',
     doc_count: 1981,
@@ -21,6 +22,7 @@ export const significantTerms: SignificantTerm[] = [
     type: 'keyword',
   },
   {
+    key: 'response_code:500',
     fieldName: 'response_code',
     fieldValue: '500',
     doc_count: 1819,
@@ -33,6 +35,7 @@ export const significantTerms: SignificantTerm[] = [
     type: 'keyword',
   },
   {
+    key: 'url:home.php',
     fieldName: 'url',
     fieldValue: 'home.php',
     doc_count: 1744,
@@ -45,6 +48,7 @@ export const significantTerms: SignificantTerm[] = [
     type: 'keyword',
   },
   {
+    key: 'url:login.php',
     fieldName: 'url',
     fieldValue: 'login.php',
     doc_count: 1738,
diff --git a/x-pack/plugins/aiops/common/api/stream_reducer.test.ts b/x-pack/plugins/aiops/common/api/stream_reducer.test.ts
index 57e6c94c8d1cd..0d422ec34b925 100644
--- a/x-pack/plugins/aiops/common/api/stream_reducer.test.ts
+++ b/x-pack/plugins/aiops/common/api/stream_reducer.test.ts
@@ -39,6 +39,7 @@ describe('streamReducer', () => {
       initialState,
       addSignificantTermsAction([
         {
+          key: 'the-field-name:the-field-value',
           fieldName: 'the-field-name',
           fieldValue: 'the-field-value',
           doc_count: 10,
diff --git a/x-pack/plugins/aiops/public/application/utils/build_extended_base_filter_criteria.test.ts b/x-pack/plugins/aiops/public/application/utils/build_extended_base_filter_criteria.test.ts
index b293dd0a7d7a3..ccb4986bf1c05 100644
--- a/x-pack/plugins/aiops/public/application/utils/build_extended_base_filter_criteria.test.ts
+++ b/x-pack/plugins/aiops/public/application/utils/build_extended_base_filter_criteria.test.ts
@@ -12,6 +12,7 @@ import type { GroupTableItem } from '../../components/log_rate_analysis_results_
 import { buildExtendedBaseFilterCriteria } from './build_extended_base_filter_criteria';
 
 const selectedSignificantTermMock: SignificantTerm = {
+  key: 'meta.cloud.instance_id.keyword:1234',
   doc_count: 53408,
   bg_count: 1154,
   fieldName: 'meta.cloud.instance_id.keyword',
diff --git a/x-pack/plugins/aiops/public/components/log_rate_analysis_results_table/log_rate_analysis_results_table.tsx b/x-pack/plugins/aiops/public/components/log_rate_analysis_results_table/log_rate_analysis_results_table.tsx
index 13b91e8c81151..427ad1a2d8da2 100644
--- a/x-pack/plugins/aiops/public/components/log_rate_analysis_results_table/log_rate_analysis_results_table.tsx
+++ b/x-pack/plugins/aiops/public/components/log_rate_analysis_results_table/log_rate_analysis_results_table.tsx
@@ -15,8 +15,10 @@ import {
   EuiBadge,
   EuiBasicTable,
   EuiBasicTableColumn,
+  EuiCode,
   EuiIcon,
   EuiIconTip,
+  EuiText,
   EuiTableSortingType,
   EuiToolTip,
 } from '@elastic/eui';
@@ -143,9 +145,19 @@ export const LogRateAnalysisResultsTable: FC<LogRateAnalysisResultsTableProps> =
       name: i18n.translate('xpack.aiops.logRateAnalysis.resultsTable.fieldValueLabel', {
         defaultMessage: 'Field value',
       }),
-      render: (_, { fieldValue }) => {
-        return <div css={cssMultiLineTruncation}>{String(fieldValue)}</div>;
-      },
+      render: (_, { fieldValue, type }) => (
+        <div css={cssMultiLineTruncation}>
+          {type === 'keyword' ? (
+            String(fieldValue)
+          ) : (
+            <EuiText size="xs">
+              <EuiCode language="log" transparentBackground css={{ paddingInline: '0px' }}>
+                {fieldValue}
+              </EuiCode>
+            </EuiText>
+          )}
+        </div>
+      ),
       sortable: true,
       textOnly: true,
       truncateText: false,
diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
index ba85d9d2e255e..dc90df7316326 100644
--- a/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
+++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
@@ -229,7 +229,7 @@ export const defineLogRateAnalysisRoute = (
                   const indexInfo = await fetchIndexInfo(
                     client,
                     request.body,
-                    ['message'],
+                    ['message', 'error.message'],
                     abortSignal
                   );
 
@@ -238,7 +238,6 @@ export const defineLogRateAnalysisRoute = (
                   textFieldCandidates.push(...indexInfo.textFieldCandidates);
                   totalDocCount = indexInfo.totalDocCount;
                 } catch (e) {
-                  // console.log(e);
                   if (!isRequestAbortedError(e)) {
                     logger.error(`Failed to fetch index information, got: \n${e.toString()}`);
                     pushError(`Failed to fetch index information.`);
diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts
index e4e79f83f8e2d..4f7f234dbf208 100644
--- a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts
@@ -39,6 +39,22 @@ export const fetchSignificantCategories = async (
   emitError: (m: string) => void,
   abortSignal?: AbortSignal
 ) => {
+  // To make sure we have the same categories for both baseline and deviation,
+  // we do an initial query that spans across baseline start and deviation end.
+  // We could update this to query the exact baseline AND deviation range, but
+  // wanted to avoid the refactor here and it should be good enough for a start.
+  const categoriesOverall = await fetchCategories(
+    esClient,
+    params,
+    fieldNames,
+    params.baselineMin,
+    params.deviationMax,
+    logger,
+    sampleProbability,
+    emitError,
+    abortSignal
+  );
+
   const categoriesBaseline = await fetchCategories(
     esClient,
     params,
@@ -63,49 +79,60 @@ export const fetchSignificantCategories = async (
     abortSignal
   );
 
-  if (categoriesBaseline.length === 0 || categoriesDeviation.length === 0) return [];
-
-  const categoriesBaselineTotalCount = getCategoriesTotalCount(categoriesBaseline[0].categories);
-  const categoriesBaselineTestData = getCategoriesTestData(categoriesBaseline[0].categories);
-
-  const categoriesDeviationTotalCount = getCategoriesTotalCount(categoriesDeviation[0].categories);
-  const categoriesDeviationTestData = getCategoriesTestData(categoriesDeviation[0].categories);
-
-  // Get all unique keys from both arrays
-  const allKeys: string[] = Array.from(
-    new Set([
-      ...categoriesBaselineTestData.map((term) => term.key.toString()),
-      ...categoriesDeviationTestData.map((term) => term.key.toString()),
-    ])
-  ).slice(0, 100);
+  if (
+    categoriesBaseline.length !== fieldNames.length ||
+    categoriesDeviation.length !== fieldNames.length
+  )
+    return [];
 
   const significantCategories: SignificantTerm[] = [];
 
-  allKeys.forEach((key) => {
-    const baselineTerm = categoriesBaselineTestData.find((term) => term.key === key);
-    const deviationTerm = categoriesDeviationTestData.find((term) => term.key === key);
-
-    const observed: number = deviationTerm?.percentage ?? 0;
-    const expected: number = baselineTerm?.percentage ?? 0;
-    const chiSquared = Math.pow(observed - expected, 2) / (expected > 0 ? expected : 1e-6); // Prevent divide by zero
-
-    const pValue = criticalTableLookup(chiSquared, 1);
-    const score = Math.log(pValue);
-
-    if (pValue <= LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD && observed > expected) {
-      significantCategories.push({
-        fieldName: 'message',
-        fieldValue: key,
-        doc_count: deviationTerm?.doc_count ?? 0,
-        bg_count: baselineTerm?.doc_count ?? 0,
-        total_doc_count: categoriesDeviationTotalCount,
-        total_bg_count: categoriesBaselineTotalCount,
-        score,
-        pValue,
-        normalizedScore: getNormalizedScore(score),
-        type: 'log-pattern',
-      });
-    }
+  fieldNames.forEach((fieldName, i) => {
+    const categoriesBaselineTotalCount = getCategoriesTotalCount(categoriesBaseline[i].categories);
+    const categoriesBaselineTestData = getCategoriesTestData(categoriesBaseline[i].categories);
+
+    const categoriesDeviationTotalCount = getCategoriesTotalCount(
+      categoriesDeviation[i].categories
+    );
+    const categoriesDeviationTestData = getCategoriesTestData(categoriesDeviation[i].categories);
+
+    // Get all unique keys from both arrays
+    const allKeys: string[] = Array.from(
+      new Set([
+        ...categoriesBaselineTestData.map((term) => term.key.toString()),
+        ...categoriesDeviationTestData.map((term) => term.key.toString()),
+      ])
+    ).slice(0, 100);
+
+    allKeys.forEach((key) => {
+      const categoryData = categoriesOverall[i].categories.find((c) => c.key === key);
+
+      const baselineTerm = categoriesBaselineTestData.find((term) => term.key === key);
+      const deviationTerm = categoriesDeviationTestData.find((term) => term.key === key);
+
+      const observed: number = deviationTerm?.percentage ?? 0;
+      const expected: number = baselineTerm?.percentage ?? 0;
+      const chiSquared = Math.pow(observed - expected, 2) / (expected > 0 ? expected : 1e-6); // Prevent divide by zero
+
+      const pValue = criticalTableLookup(chiSquared, 1);
+      const score = Math.log(pValue);
+
+      if (pValue <= LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD && observed > expected) {
+        significantCategories.push({
+          key,
+          fieldName,
+          fieldValue: categoryData?.examples[0] ?? '',
+          doc_count: deviationTerm?.doc_count ?? 0,
+          bg_count: baselineTerm?.doc_count ?? 0,
+          total_doc_count: categoriesDeviationTotalCount,
+          total_bg_count: categoriesBaselineTotalCount,
+          score,
+          pValue,
+          normalizedScore: getNormalizedScore(score),
+          type: 'log-pattern',
+        });
+      }
+    });
   });
 
   return significantCategories;
diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts
index 2c1d1e95285fe..52099d9a51d83 100644
--- a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts
@@ -170,6 +170,8 @@ export const fetchSignificantTermPValues = async (
 
       if (typeof pValue === 'number' && pValue < LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD) {
         result.push({
+          key: `${fieldName}:${String(bucket.key)}`,
+          type: 'keyword',
           fieldName,
           fieldValue: String(bucket.key),
           doc_count: bucket.doc_count,
@@ -179,7 +181,6 @@ export const fetchSignificantTermPValues = async (
           score: bucket.score,
           pValue,
           normalizedScore: getNormalizedScore(bucket.score),
-          type: 'keyword',
         });
       }
     }