Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ES|QL: Improve type validation in aggs for UNSIGNED_LONG better support for VERSION #104911

Merged
merged 7 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changelog/104911.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 104911
summary: "ES|QL: Improve type validation in aggs for UNSIGNED_LONG better support\
\ for VERSION"
area: ES|QL
type: bug
issues:
- 102961
Original file line number Diff line number Diff line change
Expand Up @@ -983,3 +983,11 @@ ROW a = 1, c = null
COUNT(c):long | a:integer
0 | 1
;


countVersion#[skip:-8.12.99,reason:bug fixed in 8.13+]
from apps | stats c = count(version), cd = count_distinct(version);

c:long | cd:long
12 | 9
;
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.List;

import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.DEFAULT;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNotType;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;

public class Avg extends AggregateFunction implements SurrogateExpression {
Expand All @@ -32,7 +33,7 @@ public Avg(Source source, @Param(name = "field", type = { "double", "integer", "

@Override
protected Expression.TypeResolution resolveType() {
return isNumeric(field(), sourceText(), DEFAULT);
return isNumeric(field(), sourceText(), DEFAULT).and(isNotType(field(), DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For avg the description of the function shows

    @FunctionInfo(returnType = "double", description = "The average of a numeric field.", isAggregation = true)
    public Avg(Source source, @Param(name = "field", type = { "double", "integer", "long", "unsigned_long" }) Expression field) {
        super(source, field);
    }

which clearly shows unsigned_long. Is this type really unsupported and this must be updated as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! I didn't realize that we don't have tests for this (as we have for non-aggregate funcitons).
I'm fixing all of them

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isInteger;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNotType;

public class CountDistinct extends AggregateFunction implements OptionalArgument, ToAggregator {
private static final int DEFAULT_PRECISION = 3000;
Expand Down Expand Up @@ -84,7 +85,8 @@ protected TypeResolution resolveType() {
return new TypeResolution("Unresolved children");
}

TypeResolution resolution = EsqlTypeResolutions.isExact(field(), sourceText(), DEFAULT);
TypeResolution resolution = EsqlTypeResolutions.isExact(field(), sourceText(), DEFAULT)
.and(isNotType(field(), DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT));
if (resolution.unresolved() || precision == null) {
return resolution;
}
Expand All @@ -109,7 +111,7 @@ public AggregatorFunctionSupplier supplier(List<Integer> inputChannels) {
if (type == DataTypes.DOUBLE) {
return new CountDistinctDoubleAggregatorFunctionSupplier(inputChannels, precision);
}
if (type == DataTypes.KEYWORD || type == DataTypes.IP || type == DataTypes.TEXT) {
if (type == DataTypes.KEYWORD || type == DataTypes.IP || type == DataTypes.VERSION || type == DataTypes.TEXT) {
return new CountDistinctBytesRefAggregatorFunctionSupplier(inputChannels, precision);
}
throw EsqlIllegalArgumentException.illegalDataType(type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.List;

import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.DEFAULT;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNotType;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;

public class Median extends AggregateFunction implements SurrogateExpression {
Expand All @@ -37,7 +38,7 @@ public Median(Source source, @Param(name = "field", type = { "double", "integer"

@Override
protected Expression.TypeResolution resolveType() {
return isNumeric(field(), sourceText(), DEFAULT);
return isNumeric(field(), sourceText(), DEFAULT).and(isNotType(field(), DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.List;

import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.DEFAULT;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNotType;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;

public abstract class NumericAggregate extends AggregateFunction implements ToAggregator {
Expand All @@ -34,16 +35,10 @@ public abstract class NumericAggregate extends AggregateFunction implements ToAg
@Override
protected TypeResolution resolveType() {
if (supportsDates()) {
return TypeResolutions.isType(
this,
e -> e.isNumeric() || e == DataTypes.DATETIME,
sourceText(),
DEFAULT,
"numeric",
"datetime"
);
return TypeResolutions.isType(this, e -> e.isNumeric() || e == DataTypes.DATETIME, sourceText(), DEFAULT, "numeric", "datetime")
.and(isNotType(field(), DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT));
}
return isNumeric(field(), sourceText(), DEFAULT);
return isNumeric(field(), sourceText(), DEFAULT).and(isNotType(field(), DataTypes.UNSIGNED_LONG, sourceText(), DEFAULT));
}

protected boolean supportsDates() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataTypes;

import java.util.List;

import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.FIRST;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNotType;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;

public class Percentile extends NumericAggregate {
Expand Down Expand Up @@ -61,7 +63,9 @@ protected TypeResolution resolveType() {
return new TypeResolution("Unresolved children");
}

TypeResolution resolution = isNumeric(field(), sourceText(), FIRST);
TypeResolution resolution = isNumeric(field(), sourceText(), FIRST).and(
isNotType(field(), DataTypes.UNSIGNED_LONG, sourceText(), FIRST)
);
if (resolution.unresolved()) {
return resolution;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,15 +254,18 @@ private static String dataTypeToString(DataType type, Class<?> aggClass) {
return "Long";
} else if (type.equals(DataTypes.DOUBLE)) {
return "Double";
} else if (type.equals(DataTypes.KEYWORD) || type.equals(DataTypes.IP) || type.equals(DataTypes.TEXT)) {
return "BytesRef";
} else if (type.equals(GEO_POINT)) {
return "GeoPoint";
} else if (type.equals(CARTESIAN_POINT)) {
return "CartesianPoint";
} else {
throw new EsqlIllegalArgumentException("illegal agg type: " + type.typeName());
}
} else if (type.equals(DataTypes.KEYWORD)
|| type.equals(DataTypes.IP)
|| type.equals(DataTypes.VERSION)
|| type.equals(DataTypes.TEXT)) {
return "BytesRef";
} else if (type.equals(GEO_POINT)) {
return "GeoPoint";
} else if (type.equals(CARTESIAN_POINT)) {
return "CartesianPoint";
} else {
throw new EsqlIllegalArgumentException("illegal agg type: " + type.typeName());
}
Comment on lines +262 to +268
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorrect indentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you, but apparently Spotless does not agree with us.
It surprised me as well, I don't think we can do much.

}

private static Expression unwrapAlias(Expression expression) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1554,6 +1554,38 @@ public void testUnresolvedMvExpand() {
assertThat(e.getMessage(), containsString("Unknown column [bar]"));
}

public void testUnsupportedTypesInStats() {
verifyUnsupported(
"""
row x = to_unsigned_long(\"10\")
| stats avg(x), count_distinct(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x)
""",
"Found 8 problems\n"
+ "line 2:12: argument of [avg(x)] cannot be [unsigned_long], found [x]\n"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our error messages tell the user what is the expectation and what is the actual value. Instead of found [x] it should be found "10".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible only for foldable expressions, but in the general case we only have the variable name, eg.

from person | stats  avg(salary)

Maybe argument of [avg(salary)] cannot be [unsigned_long], found [salary] is just redundant.
Anyway, as you suggested in the comment above, probably there is no need for this new message at all, I can just use isType() with a custom validation expression

+ "line 2:20: argument of [count_distinct(x)] cannot be [unsigned_long], found [x]\n"
+ "line 2:39: argument of [max(x)] cannot be [unsigned_long], found [x]\n"
+ "line 2:47: argument of [median(x)] cannot be [unsigned_long], found [x]\n"
+ "line 2:58: argument of [median_absolute_deviation(x)] cannot be [unsigned_long], found [x]\n"
+ "line 2:88: argument of [min(x)] cannot be [unsigned_long], found [x]\n"
+ "line 2:96: first argument of [percentile(x, 10)] cannot be [unsigned_long], found [x]\n"
+ "line 2:115: argument of [sum(x)] cannot be [unsigned_long], found [x]"
);
verifyUnsupported(
"""
row x = to_version("1.2")
| stats avg(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x)
""",
"Found 7 problems\n"
+ "line 2:10: argument of [avg(x)] must be [numeric], found value [x] type [version]\n"
+ "line 2:18: argument of [max(x)] must be [numeric or datetime], found value [max(x)] type [version]\n"
+ "line 2:26: argument of [median(x)] must be [numeric], found value [x] type [version]\n"
+ "line 2:37: argument of [median_absolute_deviation(x)] must be [numeric], found value [x] type [version]\n"
+ "line 2:67: argument of [min(x)] must be [numeric or datetime], found value [min(x)] type [version]\n"
+ "line 2:75: first argument of [percentile(x, 10)] must be [numeric], found value [x] type [version]\n"
+ "line 2:94: argument of [sum(x)] must be [numeric], found value [x] type [version]"
);
}

private void verifyUnsupported(String query, String errorMessage) {
verifyUnsupported(query, errorMessage, "mapping-multi-field-variation.json");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,21 @@ public static TypeResolution isType(
);
}

public static TypeResolution isNotType(Expression e, DataType type, String operationName, ParamOrdinal paramOrd) {
return type.equals(e.dataType())
? new TypeResolution(
format(
null,
"{}argument of [{}] cannot be [{}], found [{}]",
paramOrd == null || paramOrd == DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
e.dataType().typeName(),
name(e)
)
)
: TypeResolution.TYPE_RESOLVED;
}

private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
StringJoiner sj = new StringJoiner(", ");
for (int i = 0; i < acceptedTypes.length - 1; i++) {
Expand Down