diff --git a/docs/reference/esql/functions/aggregation-functions.asciidoc b/docs/reference/esql/functions/aggregation-functions.asciidoc index 083f0eee792cd..821b109741a0a 100644 --- a/docs/reference/esql/functions/aggregation-functions.asciidoc +++ b/docs/reference/esql/functions/aggregation-functions.asciidoc @@ -12,8 +12,8 @@ The <> command supports these aggregate functions: * <> * <> * <> -* <> -* <> +* <> +* <> * <> * <> * experimental:[] <> @@ -25,10 +25,10 @@ The <> command supports these aggregate functions: include::count.asciidoc[] include::count-distinct.asciidoc[] -include::median.asciidoc[] -include::median-absolute-deviation.asciidoc[] include::layout/avg.asciidoc[] include::layout/max.asciidoc[] +include::layout/median.asciidoc[] +include::layout/median_absolute_deviation.asciidoc[] include::layout/min.asciidoc[] include::layout/percentile.asciidoc[] include::layout/st_centroid_agg.asciidoc[] diff --git a/docs/reference/esql/functions/appendix/median.asciidoc b/docs/reference/esql/functions/appendix/median.asciidoc new file mode 100644 index 0000000000000..929a4ed0dae2c --- /dev/null +++ b/docs/reference/esql/functions/appendix/median.asciidoc @@ -0,0 +1,7 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[WARNING] +==== +`MEDIAN` is also {wikipedia}/Nondeterministic_algorithm[non-deterministic]. +This means you can get slightly different results using the same data. +==== diff --git a/docs/reference/esql/functions/appendix/median_absolute_deviation.asciidoc b/docs/reference/esql/functions/appendix/median_absolute_deviation.asciidoc new file mode 100644 index 0000000000000..a4f96c800946b --- /dev/null +++ b/docs/reference/esql/functions/appendix/median_absolute_deviation.asciidoc @@ -0,0 +1,7 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[WARNING] +==== +`MEDIAN_ABSOLUTE_DEVIATION` is also {wikipedia}/Nondeterministic_algorithm[non-deterministic]. +This means you can get slightly different results using the same data. +==== diff --git a/docs/reference/esql/functions/description/median.asciidoc b/docs/reference/esql/functions/description/median.asciidoc new file mode 100644 index 0000000000000..ff3b7b32ed15e --- /dev/null +++ b/docs/reference/esql/functions/description/median.asciidoc @@ -0,0 +1,7 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +The value that is greater than half of all values and less than half of all values, also known as the 50% <>. + +NOTE: Like <>, `MEDIAN` is <>. diff --git a/docs/reference/esql/functions/description/median_absolute_deviation.asciidoc b/docs/reference/esql/functions/description/median_absolute_deviation.asciidoc new file mode 100644 index 0000000000000..1a363920dd422 --- /dev/null +++ b/docs/reference/esql/functions/description/median_absolute_deviation.asciidoc @@ -0,0 +1,7 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns the median absolute deviation, a measure of variability. It is a robust statistic, meaning that it is useful for describing data that may have outliers, or may not be normally distributed. For such data it can be more descriptive than standard deviation. It is calculated as the median of each data point's deviation from the median of the entire sample. That is, for a random variable `X`, the median absolute deviation is `median(|median(X) - X|)`. + +NOTE: Like <>, `MEDIAN_ABSOLUTE_DEVIATION` is <>. diff --git a/docs/reference/esql/functions/examples/median.asciidoc b/docs/reference/esql/functions/examples/median.asciidoc new file mode 100644 index 0000000000000..cb6248dcff148 --- /dev/null +++ b/docs/reference/esql/functions/examples/median.asciidoc @@ -0,0 +1,22 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Examples* + +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_percentile.csv-spec[tag=median] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_percentile.csv-spec[tag=median-result] +|=== +The expression can use inline functions. For example, to calculate the median of the maximum values of a multivalued column, first use `MV_MAX` to get the maximum value per row, and use the result with the `MEDIAN` function +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMedianNestedExpression] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMedianNestedExpression-result] +|=== + diff --git a/docs/reference/esql/functions/examples/median_absolute_deviation.asciidoc b/docs/reference/esql/functions/examples/median_absolute_deviation.asciidoc new file mode 100644 index 0000000000000..20891126c20fb --- /dev/null +++ b/docs/reference/esql/functions/examples/median_absolute_deviation.asciidoc @@ -0,0 +1,22 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Examples* + +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_percentile.csv-spec[tag=median-absolute-deviation] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_percentile.csv-spec[tag=median-absolute-deviation-result] +|=== +The expression can use inline functions. For example, to calculate the the median absolute deviation of the maximum values of a multivalued column, first use `MV_MAX` to get the maximum value per row, and use the result with the `MEDIAN_ABSOLUTE_DEVIATION` function +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMADNestedExpression] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMADNestedExpression-result] +|=== + diff --git a/docs/reference/esql/functions/kibana/definition/median.json b/docs/reference/esql/functions/kibana/definition/median.json new file mode 100644 index 0000000000000..4887a4497e813 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/median.json @@ -0,0 +1,49 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "median", + "description" : "The value that is greater than half of all values and less than half of all values, also known as the 50% <>.", + "note" : "Like <>, `MEDIAN` is <>.", + "signatures" : [ + { + "params" : [ + { + "name" : "number", + "type" : "double", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "integer", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "long", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + } + ], + "examples" : [ + "FROM employees\n| STATS MEDIAN(salary), PERCENTILE(salary, 50)", + "FROM employees\n| STATS median_max_salary_change = MEDIAN(MV_MAX(salary_change))" + ] +} diff --git a/docs/reference/esql/functions/kibana/definition/median_absolute_deviation.json b/docs/reference/esql/functions/kibana/definition/median_absolute_deviation.json new file mode 100644 index 0000000000000..4a8b1cd30611f --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/median_absolute_deviation.json @@ -0,0 +1,49 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "median_absolute_deviation", + "description" : "Returns the median absolute deviation, a measure of variability. It is a robust statistic, meaning that it is useful for describing data that may have outliers, or may not be normally distributed. For such data it can be more descriptive than standard deviation.\n\nIt is calculated as the median of each data point's deviation from the median of the entire sample. That is, for a random variable `X`, the median absolute deviation is `median(|median(X) - X|)`.", + "note" : "Like <>, `MEDIAN_ABSOLUTE_DEVIATION` is <>.", + "signatures" : [ + { + "params" : [ + { + "name" : "number", + "type" : "double", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "integer", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "long", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + } + ], + "examples" : [ + "FROM employees\n| STATS MEDIAN(salary), MEDIAN_ABSOLUTE_DEVIATION(salary)", + "FROM employees\n| STATS m_a_d_max_salary_change = MEDIAN_ABSOLUTE_DEVIATION(MV_MAX(salary_change))" + ] +} diff --git a/docs/reference/esql/functions/kibana/docs/median.md b/docs/reference/esql/functions/kibana/docs/median.md new file mode 100644 index 0000000000000..7a4370b4d2551 --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/median.md @@ -0,0 +1,12 @@ + + +### MEDIAN +The value that is greater than half of all values and less than half of all values, also known as the 50% <>. + +``` +FROM employees +| STATS MEDIAN(salary), PERCENTILE(salary, 50) +``` +Note: Like <>, `MEDIAN` is <>. diff --git a/docs/reference/esql/functions/kibana/docs/median_absolute_deviation.md b/docs/reference/esql/functions/kibana/docs/median_absolute_deviation.md new file mode 100644 index 0000000000000..8db113deb2c49 --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/median_absolute_deviation.md @@ -0,0 +1,14 @@ + + +### MEDIAN_ABSOLUTE_DEVIATION +Returns the median absolute deviation, a measure of variability. It is a robust statistic, meaning that it is useful for describing data that may have outliers, or may not be normally distributed. For such data it can be more descriptive than standard deviation. + +It is calculated as the median of each data point's deviation from the median of the entire sample. That is, for a random variable `X`, the median absolute deviation is `median(|median(X) - X|)`. + +``` +FROM employees +| STATS MEDIAN(salary), MEDIAN_ABSOLUTE_DEVIATION(salary) +``` +Note: Like <>, `MEDIAN_ABSOLUTE_DEVIATION` is <>. diff --git a/docs/reference/esql/functions/layout/median.asciidoc b/docs/reference/esql/functions/layout/median.asciidoc new file mode 100644 index 0000000000000..c03e73523983d --- /dev/null +++ b/docs/reference/esql/functions/layout/median.asciidoc @@ -0,0 +1,16 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-median]] +=== `MEDIAN` + +*Syntax* + +[.text-center] +image::esql/functions/signature/median.svg[Embedded,opts=inline] + +include::../parameters/median.asciidoc[] +include::../description/median.asciidoc[] +include::../types/median.asciidoc[] +include::../examples/median.asciidoc[] +include::../appendix/median.asciidoc[] diff --git a/docs/reference/esql/functions/layout/median_absolute_deviation.asciidoc b/docs/reference/esql/functions/layout/median_absolute_deviation.asciidoc new file mode 100644 index 0000000000000..b558e6f11d9d0 --- /dev/null +++ b/docs/reference/esql/functions/layout/median_absolute_deviation.asciidoc @@ -0,0 +1,16 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-median_absolute_deviation]] +=== `MEDIAN_ABSOLUTE_DEVIATION` + +*Syntax* + +[.text-center] +image::esql/functions/signature/median_absolute_deviation.svg[Embedded,opts=inline] + +include::../parameters/median_absolute_deviation.asciidoc[] +include::../description/median_absolute_deviation.asciidoc[] +include::../types/median_absolute_deviation.asciidoc[] +include::../examples/median_absolute_deviation.asciidoc[] +include::../appendix/median_absolute_deviation.asciidoc[] diff --git a/docs/reference/esql/functions/median.asciidoc b/docs/reference/esql/functions/median.asciidoc deleted file mode 100644 index 2f7d70775e38e..0000000000000 --- a/docs/reference/esql/functions/median.asciidoc +++ /dev/null @@ -1,52 +0,0 @@ -[discrete] -[[esql-agg-median]] -=== `MEDIAN` - -*Syntax* - -[source,esql] ----- -MEDIAN(expression) ----- - -*Parameters* - -`expression`:: -Expression from which to return the median value. - -*Description* - -Returns the value that is greater than half of all values and less than half of -all values, also known as the 50% <>. - -NOTE: Like <>, `MEDIAN` is <>. - -[WARNING] -==== -`MEDIAN` is also {wikipedia}/Nondeterministic_algorithm[non-deterministic]. -This means you can get slightly different results using the same data. -==== - -*Example* - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_percentile.csv-spec[tag=median] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_percentile.csv-spec[tag=median-result] -|=== - -The expression can use inline functions. For example, to calculate the median of -the maximum values of a multivalued column, first use `MV_MAX` to get the -maximum value per row, and use the result with the `MEDIAN` function: - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMedianNestedExpression] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMedianNestedExpression-result] -|=== diff --git a/docs/reference/esql/functions/parameters/median.asciidoc b/docs/reference/esql/functions/parameters/median.asciidoc new file mode 100644 index 0000000000000..91c56709d182a --- /dev/null +++ b/docs/reference/esql/functions/parameters/median.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`number`:: + diff --git a/docs/reference/esql/functions/parameters/median_absolute_deviation.asciidoc b/docs/reference/esql/functions/parameters/median_absolute_deviation.asciidoc new file mode 100644 index 0000000000000..91c56709d182a --- /dev/null +++ b/docs/reference/esql/functions/parameters/median_absolute_deviation.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`number`:: + diff --git a/docs/reference/esql/functions/signature/median.svg b/docs/reference/esql/functions/signature/median.svg new file mode 100644 index 0000000000000..c61b3a9e77817 --- /dev/null +++ b/docs/reference/esql/functions/signature/median.svg @@ -0,0 +1 @@ +MEDIAN(number) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/median_absolute_deviation.svg b/docs/reference/esql/functions/signature/median_absolute_deviation.svg new file mode 100644 index 0000000000000..bcf01de52ac12 --- /dev/null +++ b/docs/reference/esql/functions/signature/median_absolute_deviation.svg @@ -0,0 +1 @@ +MEDIAN_ABSOLUTE_DEVIATION(number) \ No newline at end of file diff --git a/docs/reference/esql/functions/types/median.asciidoc b/docs/reference/esql/functions/types/median.asciidoc new file mode 100644 index 0000000000000..273dae4af76c2 --- /dev/null +++ b/docs/reference/esql/functions/types/median.asciidoc @@ -0,0 +1,11 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +number | result +double | double +integer | double +long | double +|=== diff --git a/docs/reference/esql/functions/types/median_absolute_deviation.asciidoc b/docs/reference/esql/functions/types/median_absolute_deviation.asciidoc new file mode 100644 index 0000000000000..273dae4af76c2 --- /dev/null +++ b/docs/reference/esql/functions/types/median_absolute_deviation.asciidoc @@ -0,0 +1,11 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +number | result +double | double +integer | double +long | double +|=== diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index cb073cbc35b61..c036e04bc8ba3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -40,8 +40,8 @@ double e() "double log10(number:double|integer|long|unsigned_long)" "keyword|text ltrim(string:keyword|text)" "boolean|double|integer|long|date|ip max(field:boolean|double|integer|long|date|ip)" -"double|integer|long median(number:double|integer|long)" -"double|integer|long median_absolute_deviation(number:double|integer|long)" +"double median(number:double|integer|long)" +"double median_absolute_deviation(number:double|integer|long)" "boolean|double|integer|long|date|ip min(field:boolean|double|integer|long|date|ip)" "boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version mv_append(field1:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version, field2:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version)" "double mv_avg(number:double|integer|long|unsigned_long)" @@ -283,8 +283,8 @@ log |Returns the logarithm of a value to a base. The input can be any log10 |Returns the logarithm of a value to base 10. The input can be any numeric value, the return value is always a double. Logs of 0 and negative numbers return `null` as well as a warning. ltrim |Removes leading whitespaces from a string. max |The maximum value of a field. -median |The value that is greater than half of all values and less than half of all values. -median_absolut|The median absolute deviation, a measure of variability. +median |The value that is greater than half of all values and less than half of all values, also known as the 50% <>. +median_absolut|"Returns the median absolute deviation, a measure of variability. It is a robust statistic, meaning that it is useful for describing data that may have outliers, or may not be normally distributed. For such data it can be more descriptive than standard deviation. It is calculated as the median of each data point's deviation from the median of the entire sample. That is, for a random variable `X`, the median absolute deviation is `median(|median(X) - X|)`." min |The minimum value of a field. mv_append |Concatenates values of two multi-value fields. mv_avg |Converts a multivalued field into a single valued field containing the average of all of the values. @@ -406,8 +406,8 @@ log |double log10 |double |false |false |false ltrim |"keyword|text" |false |false |false max |"boolean|double|integer|long|date|ip" |false |false |true -median |"double|integer|long" |false |false |true -median_absolut|"double|integer|long" |false |false |true +median |double |false |false |true +median_absolut|double |false |false |true min |"boolean|double|integer|long|date|ip" |false |false |true mv_append |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version" |[false, false] |false |false mv_avg |double |false |false |false diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java index 36207df331e47..348fef577c934 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; @@ -32,9 +33,26 @@ public class Median extends AggregateFunction implements SurrogateExpression { // TODO: Add the compression parameter @FunctionInfo( - returnType = { "double", "integer", "long" }, - description = "The value that is greater than half of all values and less than half of all values.", - isAggregation = true + returnType = "double", + description = "The value that is greater than half of all values and less than half of all values, " + + "also known as the 50% <>.", + note = "Like <>, `MEDIAN` is <>.", + appendix = """ + [WARNING] + ==== + `MEDIAN` is also {wikipedia}/Nondeterministic_algorithm[non-deterministic]. + This means you can get slightly different results using the same data. + ====""", + isAggregation = true, + examples = { + @Example(file = "stats_percentile", tag = "median"), + @Example( + description = "The expression can use inline functions. For example, to calculate the median of " + + "the maximum values of a multivalued column, first use `MV_MAX` to get the " + + "maximum value per row, and use the result with the `MEDIAN` function", + file = "stats_percentile", + tag = "docsStatsMedianNestedExpression" + ), } ) public Median(Source source, @Param(name = "number", type = { "double", "integer", "long" }) Expression field) { super(source, field); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java index 23d55942cc72f..46661e96b1d48 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; @@ -31,9 +32,33 @@ public class MedianAbsoluteDeviation extends NumericAggregate { // TODO: Add parameter @FunctionInfo( - returnType = { "double", "integer", "long" }, - description = "The median absolute deviation, a measure of variability.", - isAggregation = true + returnType = "double", + description = "Returns the median absolute deviation, a measure of variability. It is a robust " + + "statistic, meaning that it is useful for describing data that may have outliers, " + + "or may not be normally distributed. For such data it can be more descriptive " + + "than standard deviation." + + "\n\n" + + "It is calculated as the median of each data point's deviation from the median of " + + "the entire sample. That is, for a random variable `X`, the median absolute " + + "deviation is `median(|median(X) - X|)`.", + note = "Like <>, `MEDIAN_ABSOLUTE_DEVIATION` is <>.", + appendix = """ + [WARNING] + ==== + `MEDIAN_ABSOLUTE_DEVIATION` is also {wikipedia}/Nondeterministic_algorithm[non-deterministic]. + This means you can get slightly different results using the same data. + ====""", + isAggregation = true, + examples = { + @Example(file = "stats_percentile", tag = "median-absolute-deviation"), + @Example( + description = "The expression can use inline functions. For example, to calculate the the " + + "median absolute deviation of the maximum values of a multivalued column, first " + + "use `MV_MAX` to get the maximum value per row, and use the result with the " + + "`MEDIAN_ABSOLUTE_DEVIATION` function", + file = "stats_percentile", + tag = "docsStatsMADNestedExpression" + ), } ) public MedianAbsoluteDeviation(Source source, @Param(name = "number", type = { "double", "integer", "long" }) Expression field) { super(source, field); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviationTests.java new file mode 100644 index 0000000000000..2ba3345c95bdc --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviationTests.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.aggregate; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.search.aggregations.metrics.InternalMedianAbsoluteDeviation; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; +import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.equalTo; + +public class MedianAbsoluteDeviationTests extends AbstractAggregationTestCase { + public MedianAbsoluteDeviationTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + var suppliers = Stream.of( + MultiRowTestCaseSupplier.intCases(1, 1000, Integer.MIN_VALUE, Integer.MAX_VALUE, true), + MultiRowTestCaseSupplier.longCases(1, 1000, Long.MIN_VALUE, Long.MAX_VALUE, true), + MultiRowTestCaseSupplier.doubleCases(1, 1000, -Double.MAX_VALUE, Double.MAX_VALUE, true) + ).flatMap(List::stream).map(MedianAbsoluteDeviationTests::makeSupplier).toList(); + + return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers); + } + + @Override + protected Expression build(Source source, List args) { + return new MedianAbsoluteDeviation(source, args.get(0)); + } + + private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier fieldSupplier) { + return new TestCaseSupplier(List.of(fieldSupplier.type()), () -> { + var fieldTypedData = fieldSupplier.get(); + + var digest = TDigestState.create(1000); + + for (var value : fieldTypedData.multiRowData()) { + digest.add(((Number) value).doubleValue()); + } + + var expected = digest.size() == 0 ? null : InternalMedianAbsoluteDeviation.computeMedianAbsoluteDeviation(digest); + + return new TestCaseSupplier.TestCase( + List.of(fieldTypedData), + "MedianAbsoluteDeviation[number=Attribute[channel=0]]", + DataType.DOUBLE, + equalTo(expected) + ); + }); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianTests.java new file mode 100644 index 0000000000000..b94045a9c732e --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianTests.java @@ -0,0 +1,103 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.aggregate; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; +import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.equalTo; + +public class MedianTests extends AbstractAggregationTestCase { + public MedianTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + var suppliers = Stream.of( + MultiRowTestCaseSupplier.intCases(1, 1000, Integer.MIN_VALUE, Integer.MAX_VALUE, true), + MultiRowTestCaseSupplier.longCases(1, 1000, Long.MIN_VALUE, Long.MAX_VALUE, true), + MultiRowTestCaseSupplier.doubleCases(1, 1000, -Double.MAX_VALUE, Double.MAX_VALUE, true) + ).flatMap(List::stream).map(MedianTests::makeSupplier).collect(Collectors.toCollection(ArrayList::new)); + + suppliers.addAll( + List.of( + // Folding + new TestCaseSupplier( + List.of(DataType.INTEGER), + () -> new TestCaseSupplier.TestCase( + List.of(TestCaseSupplier.TypedData.multiRow(List.of(200), DataType.INTEGER, "number")), + "Median[field=Attribute[channel=0]]", + DataType.DOUBLE, + equalTo(200.) + ) + ), + new TestCaseSupplier( + List.of(DataType.LONG), + () -> new TestCaseSupplier.TestCase( + List.of(TestCaseSupplier.TypedData.multiRow(List.of(200L), DataType.LONG, "number")), + "Median[field=Attribute[channel=0]]", + DataType.DOUBLE, + equalTo(200.) + ) + ), + new TestCaseSupplier( + List.of(DataType.DOUBLE), + () -> new TestCaseSupplier.TestCase( + List.of(TestCaseSupplier.TypedData.multiRow(List.of(200.), DataType.DOUBLE, "number")), + "Median[field=Attribute[channel=0]]", + DataType.DOUBLE, + equalTo(200.) + ) + ) + ) + ); + + return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers); + } + + @Override + protected Expression build(Source source, List args) { + return new Median(source, args.get(0)); + } + + private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier fieldSupplier) { + return new TestCaseSupplier(List.of(fieldSupplier.type()), () -> { + var fieldTypedData = fieldSupplier.get(); + + var digest = TDigestState.create(1000); + + for (var value : fieldTypedData.multiRowData()) { + digest.add(((Number) value).doubleValue()); + } + + var expected = digest.size() == 0 ? null : digest.quantile(0.5); + + return new TestCaseSupplier.TestCase( + List.of(fieldTypedData), + "Median[number=Attribute[channel=0]]", + DataType.DOUBLE, + equalTo(expected) + ); + }); + } +}