diff --git a/docs/changelog/104911.yaml b/docs/changelog/104911.yaml new file mode 100644 index 0000000000000..17a335337e345 --- /dev/null +++ b/docs/changelog/104911.yaml @@ -0,0 +1,7 @@ +pr: 104911 +summary: "ES|QL: Improve type validation in aggs for UNSIGNED_LONG better support\ + \ for VERSION" +area: ES|QL +type: bug +issues: + - 102961 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec index e1c1b276a90eb..16a4ebf8fb03e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec @@ -16,7 +16,7 @@ asin |"double asin(n:double|integer|long|unsigned_long)"|n atan |"double atan(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "" |double | "Inverse tangent trigonometric function." | false | false | false atan2 |"double atan2(y:double|integer|long|unsigned_long, x:double|integer|long|unsigned_long)" |[y, x] |["double|integer|long|unsigned_long", "double|integer|long|unsigned_long"] |["", ""] |double | "The angle between the positive x-axis and the ray from the origin to the point (x , y) in the Cartesian plane." | [false, false] | false | false auto_bucket |"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|string, to:integer|long|double|date|string)" |[field, buckets, from, to] |["integer|long|double|date", "integer", "integer|long|double|date|string", "integer|long|double|date|string"] |["", "", "", ""] | "double|date" | "Creates human-friendly buckets and returns a datetime value for each row that corresponds to the resulting bucket the row falls into." | [false, false, false, false] | false | false -avg |"double avg(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |double | "The average of a numeric field." | false | false | true +avg |"double avg(field:double|integer|long)" |field |"double|integer|long" | "" |double | "The average of a numeric field." | false | false | true case |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, rest...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" |[condition, rest] |["boolean", "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version"] |["", ""] |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version" | "Accepts pairs of conditions and values. The function returns the value that belongs to the first condition that evaluates to true." | [false, false] | true | false ceil |"double|integer|long|unsigned_long ceil(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "" | "double|integer|long|unsigned_long" | "Round a number up to the nearest integer." | false | false | false cidr_match |boolean cidr_match(ip:ip, blockX...:keyword) |[ip, blockX] |[ip, keyword] |["", "CIDR block to test the IP against."] |boolean | "Returns true if the provided IP is contained in one of the provided CIDR blocks." | [false, false] | true | false @@ -25,7 +25,7 @@ concat |"keyword concat(first:keyword|text, rest...:keyword|te cos |"double cos(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "An angle, in radians" |double | "Returns the trigonometric cosine of an angle" | false | false | false cosh |"double cosh(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "The number who's hyperbolic cosine is to be returned" |double | "Returns the hyperbolic cosine of a number" | false | false | false count |"long count(?field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" |field |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version" | "Column or literal for which to count the number of values." |long | "Returns the total number (count) of input values." | true | false | true -count_distinct |"long count_distinct(field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version, ?precision:integer)" |[field, precision] |["boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version, integer"] |["Column or literal for which to count the number of distinct values.", ""] |long | "Returns the approximate number of distinct values." | [false, true] | false | true +count_distinct |"long count_distinct(field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|version, ?precision:integer)" |[field, precision] |["boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|version, integer"] |["Column or literal for which to count the number of distinct values.", ""] |long | "Returns the approximate number of distinct values." | [false, true] | false | true date_diff |"integer date_diff(unit:keyword|text, startTimestamp:date, endTimestamp:date)"|[unit, startTimestamp, endTimestamp] |["keyword|text", "date", "date"] |["A valid date unit", "A string representing a start timestamp", "A string representing an end timestamp"] |integer | "Subtract 2 dates and return their difference in multiples of a unit specified in the 1st argument" | [false, false, false] | false | false date_extract |long date_extract(date_part:keyword, field:date) |[date_part, field] |[keyword, date] |["Part of the date to extract. Can be: aligned_day_of_week_in_month; aligned_day_of_week_in_year; aligned_week_of_month; aligned_week_of_year; ampm_of_day; clock_hour_of_ampm; clock_hour_of_day; day_of_month; day_of_week; day_of_year; epoch_day; era; hour_of_ampm; hour_of_day; instant_seconds; micro_of_day; micro_of_second; milli_of_day; milli_of_second; minute_of_day; minute_of_hour; month_of_year; nano_of_day; nano_of_second; offset_seconds; proleptic_month; second_of_day; second_of_minute; year; or year_of_era.", "Date expression"] |long | "Extracts parts of a date, like year, month, day, hour." | [false, false] | false | false date_format |keyword date_format(?format:keyword, date:date) |[format, date] |[keyword, date] |["A valid date pattern", "Date expression"] |keyword | "Returns a string representation of a date, in the provided format." | [true, false] | false | false @@ -40,10 +40,10 @@ left |"keyword left(str:keyword|text, length:integer)" length |"integer length(str:keyword|text)" |str |"keyword|text" | "" |integer | "Returns the character length of a string." | false | false | false log10 |"double log10(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "" |double | "Returns the log base 10." | false | false | false ltrim |"keyword|text ltrim(str:keyword|text)" |str |"keyword|text" | "" |"keyword|text" |Removes leading whitespaces from a string.| false | false | false -max |"double|integer|long|unsigned_long max(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |"double|integer|long|unsigned_long" | "The maximum value of a numeric field." | false | false | true -median |"double|integer|long|unsigned_long median(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |"double|integer|long|unsigned_long" | "The value that is greater than half of all values and less than half of all values." | false | false | true -median_absolute_deviation|"double|integer|long|unsigned_long median_absolute_deviation(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |"double|integer|long|unsigned_long" | "The median absolute deviation, a measure of variability." | false | false | true -min |"double|integer|long|unsigned_long min(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |"double|integer|long|unsigned_long" | "The minimum value of a numeric field." | false | false | true +max |"double|integer|long max(field:double|integer|long)" |field |"double|integer|long" | "" |"double|integer|long" | "The maximum value of a numeric field." | false | false | true +median |"double|integer|long median(field:double|integer|long)" |field |"double|integer|long" | "" |"double|integer|long" | "The value that is greater than half of all values and less than half of all values." | false | false | true +median_absolute_deviation|"double|integer|long median_absolute_deviation(field:double|integer|long)" |field |"double|integer|long" | "" |"double|integer|long" | "The median absolute deviation, a measure of variability." | false | false | true +min |"double|integer|long min(field:double|integer|long)" |field |"double|integer|long" | "" |"double|integer|long" | "The minimum value of a numeric field." | false | false | true mv_avg |"double mv_avg(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |double | "Converts a multivalued field into a single valued field containing the average of all of the values." | false | false | false mv_concat |"keyword mv_concat(v:text|keyword, delim:text|keyword)" |[v, delim] |["text|keyword", "text|keyword"] |["values to join", "delimiter"] |keyword | "Reduce a multivalued string field to a single valued field by concatenating all values." | [false, false] | false | false mv_count |"integer mv_count(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)" |v | "boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version" | "" | integer | "Reduce a multivalued field to a single valued field containing the count of values." | false | false | false @@ -55,7 +55,7 @@ mv_median |"double|integer|long|unsigned_long mv_median(v:double| mv_min |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version mv_min(v:boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version)" |v | "boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" | "" |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" | "Reduce a multivalued field to a single valued field containing the minimum value." | false | false | false mv_sum |"double|integer|long|unsigned_long mv_sum(v:double|integer|long|unsigned_long)" |v |"double|integer|long|unsigned_long" | "" |"double|integer|long|unsigned_long" | "Converts a multivalued field into a single valued field containing the sum of all of the values." | false | false | false now |date now() | null |null | null |date | "Returns current date and time." | null | false | false -percentile |"double|integer|long|unsigned_long percentile(field:double|integer|long|unsigned_long, percentile:double|integer|long)" |[field, percentile] |["double|integer|long|unsigned_long, double|integer|long"] |["", ""] |"double|integer|long|unsigned_long" | "The value at which a certain percentage of observed values occur." | [false, false] | false | true +percentile |"double|integer|long percentile(field:double|integer|long, percentile:double|integer|long)" |[field, percentile] |["double|integer|long, double|integer|long"] |["", ""] |"double|integer|long" | "The value at which a certain percentage of observed values occur." | [false, false] | false | true pi |double pi() | null | null | null |double | "The ratio of a circle’s circumference to its diameter." | null | false | false pow |"double pow(base:double|integer|long|unsigned_long, exponent:double|integer|long|unsigned_long)" |[base, exponent] |["double|integer|long|unsigned_long", "double|integer|long|unsigned_long"] |["", ""] |double | "Returns the value of a base raised to the power of an exponent." | [false, false] | false | false replace |"keyword replace(str:keyword|text, regex:keyword|text, newStr:keyword|text)" | [str, regex, newStr] | ["keyword|text", "keyword|text", "keyword|text"] |["", "", ""] |keyword | "The function substitutes in the string any match of the regular expression with the replacement string." | [false, false, false]| false | false @@ -69,7 +69,7 @@ sqrt |"double sqrt(n:double|integer|long|unsigned_long)" st_centroid |"geo_point|cartesian_point st_centroid(field:geo_point|cartesian_point)" |field |"geo_point|cartesian_point" | "" |"geo_point|cartesian_point" | "The centroid of a spatial field." | false | false | true starts_with |"boolean starts_with(str:keyword|text, prefix:keyword|text)" |[str, prefix] |["keyword|text", "keyword|text"] |["", ""] |boolean | "Returns a boolean that indicates whether a keyword string starts with another string" | [false, false] | false | false substring |"keyword substring(str:keyword|text, start:integer, ?length:integer)" |[str, start, length] |["keyword|text", "integer", "integer"] |["", "", ""] |keyword | "Returns a substring of a string, specified by a start position and an optional length" | [false, false, true]| false | false -sum |"long sum(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |long | "The sum of a numeric field." | false | false | true +sum |"long sum(field:double|integer|long)" |field |"double|integer|long" | "" |long | "The sum of a numeric field." | false | false | true tan |"double tan(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "An angle, in radians" |double | "Returns the trigonometric tangent of an angle" | false | false | false tanh |"double tanh(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "The number to return the hyperbolic tangent of" |double | "Returns the hyperbolic tangent of a number" | false | false | false tau |double tau() | null | null | null |double | "The ratio of a circle’s circumference to its radius." | null | false | false @@ -112,7 +112,7 @@ synopsis:keyword "double atan(n:double|integer|long|unsigned_long)" "double atan2(y:double|integer|long|unsigned_long, x:double|integer|long|unsigned_long)" "double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|string, to:integer|long|double|date|string)" -"double avg(field:double|integer|long|unsigned_long)" +"double avg(field:double|integer|long)" "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, rest...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" "double|integer|long|unsigned_long ceil(n:double|integer|long|unsigned_long)" boolean cidr_match(ip:ip, blockX...:keyword) @@ -121,7 +121,7 @@ boolean cidr_match(ip:ip, blockX...:keyword) "double cos(n:double|integer|long|unsigned_long)" "double cosh(n:double|integer|long|unsigned_long)" "long count(?field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" -"long count_distinct(field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version, ?precision:integer)" +"long count_distinct(field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|version, ?precision:integer)" "integer date_diff(unit:keyword|text, startTimestamp:date, endTimestamp:date)" long date_extract(date_part:keyword, field:date) keyword date_format(?format:keyword, date:date) @@ -136,10 +136,10 @@ double e() "integer length(str:keyword|text)" "double log10(n:double|integer|long|unsigned_long)" "keyword|text ltrim(str:keyword|text)" -"double|integer|long|unsigned_long max(field:double|integer|long|unsigned_long)" -"double|integer|long|unsigned_long median(field:double|integer|long|unsigned_long)" -"double|integer|long|unsigned_long median_absolute_deviation(field:double|integer|long|unsigned_long)" -"double|integer|long|unsigned_long min(field:double|integer|long|unsigned_long)" +"double|integer|long max(field:double|integer|long)" +"double|integer|long median(field:double|integer|long)" +"double|integer|long median_absolute_deviation(field:double|integer|long)" +"double|integer|long min(field:double|integer|long)" "double mv_avg(field:double|integer|long|unsigned_long)" "keyword mv_concat(v:text|keyword, delim:text|keyword)" "integer mv_count(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)" @@ -151,7 +151,7 @@ double e() "boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version mv_min(v:boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version)" "double|integer|long|unsigned_long mv_sum(v:double|integer|long|unsigned_long)" date now() -"double|integer|long|unsigned_long percentile(field:double|integer|long|unsigned_long, percentile:double|integer|long)" +"double|integer|long percentile(field:double|integer|long, percentile:double|integer|long)" double pi() "double pow(base:double|integer|long|unsigned_long, exponent:double|integer|long|unsigned_long)" "keyword replace(str:keyword|text, regex:keyword|text, newStr:keyword|text)" @@ -165,7 +165,7 @@ double pi() "geo_point|cartesian_point st_centroid(field:geo_point|cartesian_point)" "boolean starts_with(str:keyword|text, prefix:keyword|text)" "keyword substring(str:keyword|text, start:integer, ?length:integer)" -"long sum(field:double|integer|long|unsigned_long)" +"long sum(field:double|integer|long)" "double tan(n:double|integer|long|unsigned_long)" "double tanh(n:double|integer|long|unsigned_long)" double tau() diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 82820a65787f2..65b01aae461e5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -986,6 +986,15 @@ COUNT(c):long | a:integer 0 | 1 ; + +countVersion#[skip:-8.12.99,reason:bug fixed in 8.13+] +from apps | stats c = count(version), cd = count_distinct(version); + +c:long | cd:long +12 | 9 +; + + docsStatsAvgNestedExpression#[skip:-8.12.99,reason:supported in 8.13+] // tag::docsStatsAvgNestedExpression[] FROM employees diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java index 0ba834d1d8954..784d97f820428 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java @@ -21,18 +21,24 @@ import java.util.List; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.DEFAULT; -import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric; +import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType; public class Avg extends AggregateFunction implements SurrogateExpression { @FunctionInfo(returnType = "double", description = "The average of a numeric field.", isAggregation = true) - public Avg(Source source, @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field) { + public Avg(Source source, @Param(name = "field", type = { "double", "integer", "long" }) Expression field) { super(source, field); } @Override protected Expression.TypeResolution resolveType() { - return isNumeric(field(), sourceText(), DEFAULT); + return isType( + field(), + dt -> dt.isNumeric() && dt != DataTypes.UNSIGNED_LONG, + sourceText(), + DEFAULT, + "numeric except unsigned_long" + ); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 62dd3bc6b6254..4e52eecc5e80a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -32,6 +32,7 @@ import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isInteger; +import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType; public class CountDistinct extends AggregateFunction implements OptionalArgument, ToAggregator { private static final int DEFAULT_PRECISION = 3000; @@ -42,19 +43,7 @@ public CountDistinct( Source source, @Param( name = "field", - type = { - "boolean", - "cartesian_point", - "date", - "double", - "geo_point", - "integer", - "ip", - "keyword", - "long", - "text", - "unsigned_long", - "version" }, + type = { "boolean", "cartesian_point", "date", "double", "geo_point", "integer", "ip", "keyword", "long", "text", "version" }, description = "Column or literal for which to count the number of distinct values." ) Expression field, @Param(optional = true, name = "precision", type = { "integer" }) Expression precision @@ -85,10 +74,21 @@ protected TypeResolution resolveType() { } TypeResolution resolution = EsqlTypeResolutions.isExact(field(), sourceText(), DEFAULT); - if (resolution.unresolved() || precision == null) { + if (resolution.unresolved()) { return resolution; } + boolean resolved = resolution.resolved(); + resolution = isType( + field(), + dt -> resolved && dt != DataTypes.UNSIGNED_LONG, + sourceText(), + DEFAULT, + "any exact type except unsigned_long" + ); + if (resolution.unresolved() || precision == null) { + return resolution; + } return isInteger(precision, sourceText(), SECOND).and(isFoldable(precision, sourceText(), SECOND)); } @@ -109,7 +109,7 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { if (type == DataTypes.DOUBLE) { return new CountDistinctDoubleAggregatorFunctionSupplier(inputChannels, precision); } - if (type == DataTypes.KEYWORD || type == DataTypes.IP || type == DataTypes.TEXT) { + if (type == DataTypes.KEYWORD || type == DataTypes.IP || type == DataTypes.VERSION || type == DataTypes.TEXT) { return new CountDistinctBytesRefAggregatorFunctionSupplier(inputChannels, precision); } throw EsqlIllegalArgumentException.illegalDataType(type); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java index cdcfe20c968a8..d8ec5300c061f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java @@ -22,12 +22,8 @@ public class Max extends NumericAggregate { - @FunctionInfo( - returnType = { "double", "integer", "long", "unsigned_long" }, - description = "The maximum value of a numeric field.", - isAggregation = true - ) - public Max(Source source, @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field) { + @FunctionInfo(returnType = { "double", "integer", "long" }, description = "The maximum value of a numeric field.", isAggregation = true) + public Max(Source source, @Param(name = "field", type = { "double", "integer", "long" }) Expression field) { super(source, field); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java index 7f5bce981db51..a6f4e30a62459 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java @@ -22,22 +22,28 @@ import java.util.List; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.DEFAULT; -import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric; +import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType; public class Median extends AggregateFunction implements SurrogateExpression { // TODO: Add the compression parameter @FunctionInfo( - returnType = { "double", "integer", "long", "unsigned_long" }, + returnType = { "double", "integer", "long" }, description = "The value that is greater than half of all values and less than half of all values.", isAggregation = true ) - public Median(Source source, @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field) { + public Median(Source source, @Param(name = "field", type = { "double", "integer", "long" }) Expression field) { super(source, field); } @Override protected Expression.TypeResolution resolveType() { - return isNumeric(field(), sourceText(), DEFAULT); + return isType( + field(), + dt -> dt.isNumeric() && dt != DataTypes.UNSIGNED_LONG, + sourceText(), + DEFAULT, + "numeric except unsigned_long" + ); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java index ddf0fd15fe2d0..ecf1a47ee9eb3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianAbsoluteDeviation.java @@ -23,14 +23,11 @@ public class MedianAbsoluteDeviation extends NumericAggregate { // TODO: Add parameter @FunctionInfo( - returnType = { "double", "integer", "long", "unsigned_long" }, + returnType = { "double", "integer", "long" }, description = "The median absolute deviation, a measure of variability.", isAggregation = true ) - public MedianAbsoluteDeviation( - Source source, - @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field - ) { + public MedianAbsoluteDeviation(Source source, @Param(name = "field", type = { "double", "integer", "long" }) Expression field) { super(source, field); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java index 22da614675f9e..8fdce6d959b98 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java @@ -22,12 +22,8 @@ public class Min extends NumericAggregate { - @FunctionInfo( - returnType = { "double", "integer", "long", "unsigned_long" }, - description = "The minimum value of a numeric field.", - isAggregation = true - ) - public Min(Source source, @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field) { + @FunctionInfo(returnType = { "double", "integer", "long" }, description = "The minimum value of a numeric field.", isAggregation = true) + public Min(Source source, @Param(name = "field", type = { "double", "integer", "long" }) Expression field) { super(source, field); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java index 297aeb7fc0e29..8e1e38441e9a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/NumericAggregate.java @@ -19,7 +19,7 @@ import java.util.List; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.DEFAULT; -import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric; +import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType; public abstract class NumericAggregate extends AggregateFunction implements ToAggregator { @@ -36,14 +36,20 @@ protected TypeResolution resolveType() { if (supportsDates()) { return TypeResolutions.isType( this, - e -> e.isNumeric() || e == DataTypes.DATETIME, + e -> e == DataTypes.DATETIME || e.isNumeric() && e != DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT, - "numeric", - "datetime" + "datetime", + "numeric except unsigned_long" ); } - return isNumeric(field(), sourceText(), DEFAULT); + return isType( + field(), + dt -> dt.isNumeric() && dt != DataTypes.UNSIGNED_LONG, + sourceText(), + DEFAULT, + "numeric except unsigned_long" + ); } protected boolean supportsDates() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java index c34783f7352c3..96385d534edcd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.tree.NodeInfo; import org.elasticsearch.xpack.ql.tree.Source; +import org.elasticsearch.xpack.ql.type.DataTypes; import java.util.List; @@ -23,18 +24,19 @@ import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable; import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric; +import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType; public class Percentile extends NumericAggregate { private final Expression percentile; @FunctionInfo( - returnType = { "double", "integer", "long", "unsigned_long" }, + returnType = { "double", "integer", "long" }, description = "The value at which a certain percentage of observed values occur.", isAggregation = true ) public Percentile( Source source, - @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field, + @Param(name = "field", type = { "double", "integer", "long" }) Expression field, @Param(name = "percentile", type = { "double", "integer", "long" }) Expression percentile ) { super(source, field, List.of(percentile)); @@ -61,7 +63,13 @@ protected TypeResolution resolveType() { return new TypeResolution("Unresolved children"); } - TypeResolution resolution = isNumeric(field(), sourceText(), FIRST); + TypeResolution resolution = isType( + field(), + dt -> dt.isNumeric() && dt != DataTypes.UNSIGNED_LONG, + sourceText(), + FIRST, + "numeric except unsigned_long" + ); if (resolution.unresolved()) { return resolution; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java index 0acf18981a83d..d09762947a597 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java @@ -29,7 +29,7 @@ public class Sum extends NumericAggregate { @FunctionInfo(returnType = "long", description = "The sum of a numeric field.", isAggregation = true) - public Sum(Source source, @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field) { + public Sum(Source source, @Param(name = "field", type = { "double", "integer", "long" }) Expression field) { super(source, field); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index c375ef24da829..f5cee225b1b13 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -254,15 +254,18 @@ private static String dataTypeToString(DataType type, Class aggClass) { return "Long"; } else if (type.equals(DataTypes.DOUBLE)) { return "Double"; - } else if (type.equals(DataTypes.KEYWORD) || type.equals(DataTypes.IP) || type.equals(DataTypes.TEXT)) { - return "BytesRef"; - } else if (type.equals(GEO_POINT)) { - return "GeoPoint"; - } else if (type.equals(CARTESIAN_POINT)) { - return "CartesianPoint"; - } else { - throw new EsqlIllegalArgumentException("illegal agg type: " + type.typeName()); - } + } else if (type.equals(DataTypes.KEYWORD) + || type.equals(DataTypes.IP) + || type.equals(DataTypes.VERSION) + || type.equals(DataTypes.TEXT)) { + return "BytesRef"; + } else if (type.equals(GEO_POINT)) { + return "GeoPoint"; + } else if (type.equals(CARTESIAN_POINT)) { + return "CartesianPoint"; + } else { + throw new EsqlIllegalArgumentException("illegal agg type: " + type.typeName()); + } } private static Expression unwrapAlias(Expression expression) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index a1d5374773eb4..ee77ff93b7687 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -1554,6 +1554,48 @@ public void testUnresolvedMvExpand() { assertThat(e.getMessage(), containsString("Unknown column [bar]")); } + public void testUnsupportedTypesInStats() { + verifyUnsupported( + """ + row x = to_unsigned_long(\"10\") + | stats avg(x), count_distinct(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x) + """, + "Found 8 problems\n" + + "line 2:12: argument of [avg(x)] must be [numeric except unsigned_long], found value [x] type [unsigned_long]\n" + + "line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long], " + + "found value [x] type [unsigned_long]\n" + + "line 2:39: argument of [max(x)] must be [datetime or numeric except unsigned_long], " + + "found value [max(x)] type [unsigned_long]\n" + + "line 2:47: argument of [median(x)] must be [numeric except unsigned_long], found value [x] type [unsigned_long]\n" + + "line 2:58: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long], " + + "found value [x] type [unsigned_long]\n" + + "line 2:88: argument of [min(x)] must be [datetime or numeric except unsigned_long], " + + "found value [min(x)] type [unsigned_long]\n" + + "line 2:96: first argument of [percentile(x, 10)] must be [numeric except unsigned_long], " + + "found value [x] type [unsigned_long]\n" + + "line 2:115: argument of [sum(x)] must be [numeric except unsigned_long], found value [x] type [unsigned_long]" + ); + + verifyUnsupported( + """ + row x = to_version("1.2") + | stats avg(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x) + """, + "Found 7 problems\n" + + "line 2:10: argument of [avg(x)] must be [numeric except unsigned_long], found value [x] type [version]\n" + + "line 2:18: argument of [max(x)] must be [datetime or numeric except unsigned_long], " + + "found value [max(x)] type [version]\n" + + "line 2:26: argument of [median(x)] must be [numeric except unsigned_long], found value [x] type [version]\n" + + "line 2:37: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long], " + + "found value [x] type [version]\n" + + "line 2:67: argument of [min(x)] must be [datetime or numeric except unsigned_long], " + + "found value [min(x)] type [version]\n" + + "line 2:75: first argument of [percentile(x, 10)] must be [numeric except unsigned_long], " + + "found value [x] type [version]\n" + + "line 2:94: argument of [sum(x)] must be [numeric except unsigned_long], found value [x] type [version]" + ); + } + private void verifyUnsupported(String query, String errorMessage) { verifyUnsupported(query, errorMessage, "mapping-multi-field-variation.json"); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 4c8e58fceffde..632c6087cf880 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -74,7 +74,7 @@ public void testAggsExpressionsInStatsAggs() { error("from test | stats max(max(salary)) by first_name") ); assertEquals( - "1:25: argument of [avg(first_name)] must be [numeric], found value [first_name] type [keyword]", + "1:25: argument of [avg(first_name)] must be [numeric except unsigned_long], found value [first_name] type [keyword]", error("from test | stats count(avg(first_name)) by first_name") ); assertEquals( @@ -244,7 +244,7 @@ public void testUnsignedLongNegation() { public void testSumOnDate() { assertEquals( - "1:19: argument of [sum(hire_date)] must be [numeric], found value [hire_date] type [datetime]", + "1:19: argument of [sum(hire_date)] must be [numeric except unsigned_long], found value [hire_date] type [datetime]", error("from test | stats sum(hire_date)") ); }