Skip to content

Commit

Permalink
[ML] Support the unsigned_long type in data frame analytics (#64066)
Browse files Browse the repository at this point in the history
Adds support for the unsigned_long type to data frame analytics.

This type is handled in the same way as the long type.  Values
sent to the ML native processes are converted to floats and
hence will lose accuracy when outside the range where a float
can uniquely represent long values.

Relates #60050
  • Loading branch information
droberts195 authored Oct 22, 2020
1 parent 695584e commit 55fe93e
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ private Types() {}
.collect(Collectors.toUnmodifiableSet());

private static final Set<String> NUMERICAL_TYPES =
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float"))
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float", "unsigned_long"))
.collect(Collectors.toUnmodifiableSet());

private static final Set<String> DISCRETE_NUMERICAL_TYPES =
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG)
.map(NumberType::typeName)
Stream.concat(
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG).map(NumberType::typeName),
Stream.of("unsigned_long"))
.collect(Collectors.toUnmodifiableSet());

private static final Set<String> BOOL_TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ private static void createIndex(String index, boolean isDatastream) {
" \"type\": \"double\"\n" +
" }," +
" \""+ DISCRETE_NUMERICAL_FIELD + "\": {\n" +
" \"type\": \"integer\"\n" +
" \"type\": \"unsigned_long\"\n" +
" }," +
" \""+ TEXT_FIELD + "\": {\n" +
" \"type\": \"text\"\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void testSourceQueryIsApplied() throws IOException {
client().admin().indices().prepareCreate(sourceIndex)
.setMapping(
"numeric_1", "type=double",
"numeric_2", "type=float",
"numeric_2", "type=unsigned_long",
"categorical", "type=keyword",
"filtered_field", "type=keyword")
.get();
Expand All @@ -64,7 +64,7 @@ public void testSourceQueryIsApplied() throws IOException {
IndexRequest indexRequest = new IndexRequest(sourceIndex);
indexRequest.source(
"numeric_1", 1.0,
"numeric_2", 2.0,
"numeric_2", 2,
"categorical", i % 2 == 0 ? "class_1" : "class_2",
"filtered_field", i < 2 ? "bingo" : "rest"); // We tag bingo on the first two docs to ensure we have 2 classes
bulkRequestBuilder.add(indexRequest);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ static void indexData(String sourceIndex, int numTrainingRows, int numNonTrainin
" \"type\": \"double\"\n" +
" }," +
" \"" + DISCRETE_NUMERICAL_FEATURE_FIELD + "\": {\n" +
" \"type\": \"long\"\n" +
" \"type\": \"unsigned_long\"\n" +
" }," +
" \"" + DEPENDENT_VARIABLE_FIELD + "\": {\n" +
" \"type\": \"double\"\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
String sourceIndex = "test-outlier-detection-with-few-docs";

client().admin().indices().prepareCreate(sourceIndex)
.setMapping("numeric_1", "type=double", "numeric_2", "type=float", "categorical_1", "type=keyword")
.setMapping("numeric_1", "type=double", "numeric_2", "type=unsigned_long", "categorical_1", "type=keyword")
.get();

BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();
Expand All @@ -83,7 +83,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
// We insert one odd value out of 5 for one feature
String docId = i == 0 ? "outlier" : "normal" + i;
indexRequest.id(docId);
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1.0, "categorical_1", "foo_" + i);
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1, "categorical_1", "foo_" + i);
bulkRequestBuilder.add(indexRequest);
}
BulkResponse bulkResponse = bulkRequestBuilder.get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public void testDetect_GivenOutlierDetectionAndNonNumericField() {
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("some_keyword"));
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
}

public void testDetect_GivenOutlierDetectionAndFieldWithNumericAndNonNumericTypes() {
Expand All @@ -121,7 +121,7 @@ public void testDetect_GivenOutlierDetectionAndFieldWithNumericAndNonNumericType
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("indecisive_field"));
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
}

public void testDetect_GivenOutlierDetectionAndMultipleFields() {
Expand All @@ -147,7 +147,7 @@ public void testDetect_GivenOutlierDetectionAndMultipleFields() {
FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " +
"supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
"supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
);
}
Expand Down Expand Up @@ -282,7 +282,7 @@ public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() {
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);

assertThat(e.getMessage(), equalTo("invalid types [keyword] for required field [foo]; " +
"expected types are [byte, double, float, half_float, integer, long, scaled_float, short]"));
"expected types are [byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
}

public void testDetect_GivenClassificationAndRequiredFieldHasInvalidType() {
Expand All @@ -298,7 +298,7 @@ public void testDetect_GivenClassificationAndRequiredFieldHasInvalidType() {
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);

assertThat(e.getMessage(), equalTo("invalid types [float] for required field [some_float]; " +
"expected types are [boolean, byte, integer, ip, keyword, long, short, text]"));
"expected types are [boolean, byte, integer, ip, keyword, long, short, text, unsigned_long]"));
}

public void testDetect_GivenClassificationAndDependentVariableHasInvalidCardinality() {
Expand Down Expand Up @@ -371,7 +371,8 @@ public void testDetect_GivenExcludedFieldIsUnsupported() {

assertFieldSelectionContains(fieldExtraction.v2(),
FieldSelection.excluded("categorical", Collections.singleton("keyword"),
"unsupported type; supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
"unsupported type; supported types are " +
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
FieldSelection.included("numeric", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
);
}
Expand Down Expand Up @@ -471,7 +472,7 @@ public void testDetect_GivenIncludedFieldHasUnsupportedType() {
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);

assertThat(e.getMessage(), equalTo("field [your_keyword] has unsupported type [keyword]. " +
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]."));
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]."));
}

public void testDetect_GivenNotIncludedFieldHasUnsupportedType() {
Expand All @@ -492,7 +493,8 @@ public void testDetect_GivenNotIncludedFieldHasUnsupportedType() {

assertFieldSelectionContains(fieldExtraction.v2(),
FieldSelection.excluded("categorical", Collections.singleton("keyword"),
"unsupported type; supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
"unsupported type; supported types are " +
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
FieldSelection.included("numeric", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
);
}
Expand All @@ -517,7 +519,7 @@ public void testDetect_GivenIndexContainsResultsField() {
FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " +
"are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]")
"are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]")
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@
- match: { field_selection.2.is_included: false }
- match: { field_selection.2.is_required: false }
- is_false: field_selection.2.feature_type
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text, unsigned_long]" }
- match: { field_selection.3.name: "field_4" }
- match: { field_selection.3.mapping_types: ["text"] }
- match: { field_selection.3.is_included: false }
Expand Down Expand Up @@ -299,7 +299,7 @@
- match: { field_selection.2.is_included: false }
- match: { field_selection.2.is_required: false }
- is_false: field_selection.2.feature_type
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text, unsigned_long]" }
- match: { field_selection.3.name: "field_4" }
- match: { field_selection.3.mapping_types: ["text"] }
- match: { field_selection.3.is_included: false }
Expand Down

0 comments on commit 55fe93e

Please sign in to comment.