Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Move chi2test to package #167237

Merged
merged 12 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@ x-pack/plugins/metrics_data_access @elastic/infra-monitoring-ui
x-pack/packages/ml/agg_utils @elastic/ml-ui
x-pack/packages/ml/anomaly_utils @elastic/ml-ui
x-pack/packages/ml/category_validator @elastic/ml-ui
x-pack/packages/ml/chi2test @elastic/ml-ui
x-pack/packages/ml/data_frame_analytics_utils @elastic/ml-ui
x-pack/packages/ml/data_grid @elastic/ml-ui
x-pack/packages/ml/date_picker @elastic/ml-ui
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@
"@kbn/ml-agg-utils": "link:x-pack/packages/ml/agg_utils",
"@kbn/ml-anomaly-utils": "link:x-pack/packages/ml/anomaly_utils",
"@kbn/ml-category-validator": "link:x-pack/packages/ml/category_validator",
"@kbn/ml-chi2test": "link:x-pack/packages/ml/chi2test",
"@kbn/ml-data-frame-analytics-utils": "link:x-pack/packages/ml/data_frame_analytics_utils",
"@kbn/ml-data-grid": "link:x-pack/packages/ml/data_grid",
"@kbn/ml-date-picker": "link:x-pack/packages/ml/date_picker",
Expand Down
2 changes: 2 additions & 0 deletions tsconfig.base.json
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,8 @@
"@kbn/ml-anomaly-utils/*": ["x-pack/packages/ml/anomaly_utils/*"],
"@kbn/ml-category-validator": ["x-pack/packages/ml/category_validator"],
"@kbn/ml-category-validator/*": ["x-pack/packages/ml/category_validator/*"],
"@kbn/ml-chi2test": ["x-pack/packages/ml/chi2test"],
"@kbn/ml-chi2test/*": ["x-pack/packages/ml/chi2test/*"],
"@kbn/ml-data-frame-analytics-utils": ["x-pack/packages/ml/data_frame_analytics_utils"],
"@kbn/ml-data-frame-analytics-utils/*": ["x-pack/packages/ml/data_frame_analytics_utils/*"],
"@kbn/ml-data-grid": ["x-pack/packages/ml/data_grid"],
Expand Down
4 changes: 4 additions & 0 deletions x-pack/packages/ml/chi2test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# @kbn/ml-chi2test

`computeChi2PValue` computes the p-value for how similar the datasets are.
Returned value ranges from 0 to 1, with 1 meaning the datasets are identical.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
* 2.0.
*/

import { computeChi2PValue } from './data_drift_utils';
import { Histogram } from './types';
import { computeChi2PValue } from './compute_chi_2_pvalue';
import type { Histogram } from './types';

describe('computeChi2PValue()', () => {
test('should return close to 1 if datasets are both empty or nearly identical', () => {
Expand Down Expand Up @@ -83,6 +83,6 @@ describe('computeChi2PValue()', () => {
percentage: 1,
},
];
expect(computeChi2PValue(referenceTerms, comparisonTerms)).toStrictEqual(0);
expect(computeChi2PValue(referenceTerms, comparisonTerms)).toStrictEqual(0.000001);
});
});
48 changes: 48 additions & 0 deletions x-pack/packages/ml/chi2test/compute_chi_2_pvalue.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { criticalTableLookup } from './critical_table_lookup';
import type { Histogram } from './types';

/**
* Compute the p-value for how similar the datasets are.
* Returned value ranges from 0 to 1, with 1 meaning the datasets are identical.
*
* @param {Histogram[]} normalizedBaselineTerms - An array of normalized baseline terms (Histogram objects).
* @param {Histogram[]} normalizedDriftedTerms - An array of normalized drifted terms (Histogram objects).
* @returns {number} The p-value indicating the similarity of the datasets.
*/
export const computeChi2PValue = (
normalizedBaselineTerms: Histogram[],
normalizedDriftedTerms: Histogram[]
) => {
// Get all unique keys from both arrays
const allKeys: string[] = Array.from(
new Set([
...normalizedBaselineTerms.map((term) => term.key.toString()),
...normalizedDriftedTerms.map((term) => term.key.toString()),
])
).slice(0, 100);

// Calculate the chi-squared statistic and degrees of freedom
let chiSquared: number = 0;
const degreesOfFreedom: number = allKeys.length - 1;

if (degreesOfFreedom === 0) return 1;

allKeys.forEach((key) => {
const baselineTerm = normalizedBaselineTerms.find((term) => term.key === key);
const driftedTerm = normalizedDriftedTerms.find((term) => term.key === key);

const observed: number = driftedTerm?.percentage ?? 0;
const expected: number = baselineTerm?.percentage ?? 0;
chiSquared += Math.pow(observed - expected, 2) / (expected > 0 ? expected : 1e-6); // Prevent divide by zero
});

// Use the criticalTableLookup function to determine the p-value
return criticalTableLookup(chiSquared, degreesOfFreedom);
};
1,038 changes: 1,038 additions & 0 deletions x-pack/packages/ml/chi2test/constants.ts

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions x-pack/packages/ml/chi2test/critical_table_lookup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { CRITICAL_VALUES_TABLE, SIGNIFICANCE_LEVELS } from './constants';

/**
* Performs a lookup in a critical values table to determine the significance level
* associated with a given chi-squared statistic and degrees of freedom.
*
* @param {number} chi2Statistic - The chi-squared statistic for which the significance level is to be determined.
* @param {number} df - The degrees of freedom (an integer) for the chi-squared test.
* @returns {number} The significance level corresponding to the chi-squared statistic and degrees of freedom.
* @throws {Error} If df is less than 1 or not an integer.
*/
export const criticalTableLookup = (chi2Statistic: number, df: number) => {
if (df < 1) return 1;
if (!Number.isInteger(df)) throw Error('Degrees of freedom must be a valid integer');

// Get the row index
const rowIndex: number = df - 1;

// Get the column index
let minDiff: number = Math.abs(CRITICAL_VALUES_TABLE[rowIndex][0] - chi2Statistic);
let columnIndex: number = 0;
for (let j = 1; j < CRITICAL_VALUES_TABLE[rowIndex].length; j++) {
const diff: number = Math.abs(CRITICAL_VALUES_TABLE[rowIndex][j] - chi2Statistic);
if (diff < minDiff) {
minDiff = diff;
columnIndex = j;
}
}

// Determine the significance level from the column index
const significanceLevel: number = SIGNIFICANCE_LEVELS[columnIndex];
return significanceLevel;
};
11 changes: 11 additions & 0 deletions x-pack/packages/ml/chi2test/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export { computeChi2PValue } from './compute_chi_2_pvalue';
export { criticalTableLookup } from './critical_table_lookup';
export { CRITICAL_VALUES_TABLE, SIGNIFICANCE_LEVELS } from './constants';
export type { Histogram } from './types';
12 changes: 12 additions & 0 deletions x-pack/packages/ml/chi2test/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

module.exports = {
preset: '@kbn/test',
rootDir: '../../../..',
roots: ['<rootDir>/x-pack/packages/ml/chi2test'],
};
5 changes: 5 additions & 0 deletions x-pack/packages/ml/chi2test/kibana.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "shared-common",
"id": "@kbn/ml-chi2test",
"owner": "@elastic/ml-ui"
}
6 changes: 6 additions & 0 deletions x-pack/packages/ml/chi2test/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "@kbn/ml-chi2test",
"private": true,
"version": "1.0.0",
"license": "Elastic License 2.0"
}
19 changes: 19 additions & 0 deletions x-pack/packages/ml/chi2test/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"extends": "../../../../tsconfig.base.json",
"compilerOptions": {
"outDir": "target/types",
"types": [
"jest",
"node",
"react"
]
},
"include": [
"**/*.ts",
"**/*.tsx",
],
"exclude": [
"target/**/*"
],
"kbn_references": []
}
24 changes: 24 additions & 0 deletions x-pack/packages/ml/chi2test/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

/**
* Interface for the Histogram type used by computeChi2PValue.
*/
export interface Histogram {
/**
* The doc count.
*/
doc_count: number;
/**
* The key.
*/
key: string | number;
/**
* Optional percentage.
*/
percentage?: number;
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,20 @@
* 2.0.
*/

import React from 'react';

import { SeriesColorAccessor } from '@elastic/charts/dist/chart_types/xy_chart/utils/specs';
import { Axis, BarSeries, Chart, Position, ScaleType, Settings, Tooltip } from '@elastic/charts';
import React from 'react';

import { FIELD_FORMAT_IDS } from '@kbn/field-formats-plugin/common';
import { getFieldFormatType, useFieldFormatter } from './default_value_formatter';
import type { Histogram } from '@kbn/ml-chi2test';

import { DataComparisonChartTooltipBody } from '../data_drift_chart_tooltip_body';
import { NoChartsData } from './no_charts_data';
import { DATA_COMPARISON_TYPE } from '../constants';
import { DataDriftField, Feature, Histogram } from '../types';
import type { DataDriftField, Feature } from '../types';

import { getFieldFormatType, useFieldFormatter } from './default_value_formatter';
import { NoChartsData } from './no_charts_data';

export const SingleDistributionChart = ({
data,
Expand Down
Loading