From 5ac4d8c71e06880624a9a91bfec4ae310d9cab2f Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Thu, 29 Aug 2024 14:48:15 +0200 Subject: [PATCH] Fix union-types where one index is missing the field (#111932) * Fix union-types where one index is missing the field When none of the indexes has the field, a validation error is correctly thrown, and when all indexes have the field, union-types works as normal. But when some indexes have the field and some do not, we were getting and internal error. We treat this case similarly to when some documents are missing the field, in which case `null` values are produced. So now a multi-index query where some indexes are missing the field will produce nulls for the documents coming from those indexes. * Update docs/changelog/111932.yaml * Added capability for this fix (missing-field) --- docs/changelog/111932.yaml | 6 ++ .../xpack/esql/CsvTestsDataLoader.java | 6 ++ .../mapping-missing_ip_sample_data.json | 13 ++++ .../main/resources/missing_ip_sample_data.csv | 8 +++ .../src/main/resources/union_types.csv-spec | 68 +++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 5 ++ .../planner/EsPhysicalOperationProviders.java | 4 +- 7 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/111932.yaml create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv diff --git a/docs/changelog/111932.yaml b/docs/changelog/111932.yaml new file mode 100644 index 0000000000000..ce840ecebcff0 --- /dev/null +++ b/docs/changelog/111932.yaml @@ -0,0 +1,6 @@ +pr: 111932 +summary: Fix union-types where one index is missing the field +area: ES|QL +type: bug +issues: + - 111912 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index b20e3bb0d5409..9ee22113a4244 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -68,6 +68,11 @@ public class CsvTestsDataLoader { "mapping-sample_data_ts_long.json", "sample_data_ts_long.csv" ); + private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset( + "missing_ip_sample_data", + "mapping-missing_ip_sample_data.json", + "missing_ip_sample_data.csv" + ); private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv"); private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv"); private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv"); @@ -112,6 +117,7 @@ public class CsvTestsDataLoader { Map.entry(ALERTS.indexName, ALERTS), Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), + Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json new file mode 100644 index 0000000000000..6f3796dd7715d --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json @@ -0,0 +1,13 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv new file mode 100644 index 0000000000000..e8e9ddcaee83b --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv @@ -0,0 +1,8 @@ +@timestamp:date,event_duration:long,message:keyword +2023-10-23T13:55:01.543Z,1756467,Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z,5033755,Connection error +2023-10-23T13:52:55.015Z,8268153,Connection error +2023-10-23T13:51:54.732Z,725448,Connection error +2023-10-23T13:33:34.937Z,1232382,Disconnected +2023-10-23T12:27:28.948Z,2764889,Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z,3450233,Connected to 10.1.0.3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index 6819727be0131..c6a2d47a78dc9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -405,6 +405,74 @@ count:long | message:keyword 2 | Connected to 10.1.0.3 ; +multiIndexMissingIpToString +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_STRING(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:keyword | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexMissingIpToIp +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + multiIndexTsLong required_capability: union_types required_capability: metadata_fields diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 81b2ba71b8808..120323ebeb7a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -183,6 +183,11 @@ public enum Cap { */ UNION_TYPES_FIX_RENAME_RESOLUTION, + /** + * Fix for union-types when some indexes are missing the required field. Done in #111932. + */ + UNION_TYPES_MISSING_FIELD, + /** * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles. * see Parsing large numbers is inconsistent #104323 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 8fddb7407a02a..04be731484267 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -138,7 +138,9 @@ private BlockLoader getBlockLoaderFor( if (unionTypes != null) { String indexName = shardContext.ctx.index().getName(); Expression conversion = unionTypes.getConversionExpressionForIndex(indexName); - return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); + return conversion == null + ? BlockLoader.CONSTANT_NULLS + : new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); } return blockLoader; }