TSDB: Add unsigned_long dimension fields to _tsid #81284

This PR builds on the work added in #80276 that generates the _tsid field for keyword, ip and number dimension fields. It adds support for unsigned_long dimension fields.
elastic · Dec 9, 2021 · 524785f · 524785f
1 parent 09dc47f
commit 524785f
Show file tree

Hide file tree

Showing 3 changed files with 154 additions and 17 deletions.
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java
@@ -187,12 +187,16 @@ public static Map<String, Object> decodeTsid(StreamInput in) {
 
                 int type = in.read();
                 switch (type) {
-                    case (byte) 's':
+                    case (byte) 's': // parse a string
                         result.put(name, in.readBytesRef().utf8ToString());
                         break;
-                    case (byte) 'l':
+                    case (byte) 'l': // parse a long
                         result.put(name, in.readLong());
                         break;
+                    case (byte) 'u': // parse an unsigned_long
+                        Object ul = DocValueFormat.UnsignedLongShiftedDocValueFormat.INSTANCE.format(in.readLong());
+                        result.put(name, ul);
+                        break;
                     default:
                         throw new IllegalArgumentException("Cannot parse [" + name + "]: Unknown type [" + type + "]");
                 }
@@ -203,7 +207,7 @@ public static Map<String, Object> decodeTsid(StreamInput in) {
         }
     }
 
-    static BytesReference encodeTsidValue(String value) {
+    public static BytesReference encodeTsidValue(String value) {
         try (BytesStreamOutput out = new BytesStreamOutput()) {
             out.write((byte) 's');
             /*
@@ -224,7 +228,7 @@ static BytesReference encodeTsidValue(String value) {
         }
     }
 
-    static BytesReference encodeTsidValue(long value) {
+    public static BytesReference encodeTsidValue(long value) {
         try (BytesStreamOutput out = new BytesStreamOutput()) {
             out.write((byte) 'l');
             out.writeLong(value);
@@ -233,4 +237,14 @@ static BytesReference encodeTsidValue(long value) {
             throw new IllegalArgumentException("Dimension field cannot be serialized.", e);
         }
     }
+
+    public static BytesReference encodeTsidUnsignedLongValue(long value) {
+        try (BytesStreamOutput out = new BytesStreamOutput()) {
+            out.write((byte) 'u');
+            out.writeLong(value);
+            return out.bytes();
+        } catch (IOException e) {
+            throw new IllegalArgumentException("Dimension field cannot be serialized.", e);
+        }
+    }
 }
diff --git a/...gned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java b/...gned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java
@@ -20,6 +20,7 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.Explicit;
+import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.fielddata.IndexNumericFieldData;
@@ -32,6 +33,7 @@
 import org.elasticsearch.index.mapper.SimpleMappedFieldType;
 import org.elasticsearch.index.mapper.SourceValueFetcher;
 import org.elasticsearch.index.mapper.TextSearchInfo;
+import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
 import org.elasticsearch.index.mapper.TimeSeriesParams;
 import org.elasticsearch.index.mapper.TimeSeriesParams.MetricType;
 import org.elasticsearch.index.mapper.ValueFetcher;
@@ -543,6 +545,15 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
             numericValue = unsignedToSortableSignedLong(numericValue);
         }
 
+        if (dimension && numericValue != null) {
+            // We encode the tsid part of the dimension field. However, there is no point
+            // in encoding the tsid value if we do not generate the _tsid field.
+            BytesReference bytes = context.getMetadataMapper(TimeSeriesIdFieldMapper.NAME) != null
+                ? TimeSeriesIdFieldMapper.encodeTsidUnsignedLongValue(numericValue)
+                : null;
+            context.doc().addDimensionBytes(fieldType().name(), bytes);
+        }
+
         List<Field> fields = new ArrayList<>();
         if (indexed) {
             fields.add(new LongPoint(fieldType().name(), numericValue));
@@ -555,19 +566,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
             String storedValued = isNullValue ? nullValue : Long.toUnsignedString(unsignedToSortableSignedLong(numericValue));
             fields.add(new StoredField(fieldType().name(), storedValued));
         }
-
-        if (dimension && fields.size() > 0) { // dimension == true requires that field is indexed and has doc-values
-            // Check that a dimension field is single-valued and not an array
-            if (context.doc().getByKey(fieldType().name()) != null) {
-                throw new IllegalArgumentException("Dimension field [" + fieldType().name() + "] cannot be a multi-valued field.");
-            }
-
-            // Add the field by key so that we can validate if it has been added
-            context.doc().addWithKey(fieldType().name(), new LongPoint(fieldType().name(), numericValue));
-            context.doc().addAll(fields.subList(1, fields.size()));
-        } else {
-            context.doc().addAll(fields);
-        }
+        context.doc().addAll(fields);
 
         if (hasDocValues == false && (stored || indexed)) {
             context.addToFieldNames(fieldType().name());

diff --git a/...gin/mapper-unsigned-long/src/yamlRestTest/resources/rest-api-spec/test/70_time_series.yml b/...gin/mapper-unsigned-long/src/yamlRestTest/resources/rest-api-spec/test/70_time_series.yml
@@ -0,0 +1,124 @@
+setup:
+  - skip:
+      version: " - 8.0.99"
+      reason: _tsid introduced in 8.1.0
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            index:
+              mode: time_series
+              routing_path: [metricset]
+              time_series:
+                start_time: 2021-04-28T00:00:00Z
+                end_time: 2021-04-29T00:00:00Z
+          mappings:
+            properties:
+              "@timestamp":
+                type: date
+              metricset:
+                type: keyword
+                time_series_dimension: true
+              ul:
+                type: unsigned_long
+                time_series_dimension: true
+
+  - do:
+      bulk:
+        refresh: true
+        index: test
+        body:
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:24.467Z", "metricset": "aa", "voltage": 7.2, "ul": 9223372036854775807}'
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:34.467Z", "metricset": "aa", "voltage": 7.6, "ul": 9223372036854775807}'
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:44.467Z", "metricset": "aa", "voltage": 7.1, "ul": 18446744073709551614}'
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:54.467Z", "metricset": "aa", "voltage": 7.3, "ul": 18446744073709551615}'
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:24.467Z", "metricset": "aa", "voltage": 3.2, "ul": 9223372036854775808}'
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:34.467Z", "metricset": "aa", "voltage": 3.6, "ul": 9223372036854775808}'
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:44.467Z", "metricset": "aa", "voltage": 3.1, "ul": 9223372036854775808}'
+          - '{"index": {}}'
+          - '{"@timestamp": "2021-04-28T18:35:54.467Z", "metricset": "aa", "voltage": 3.3, "ul": 9223372036854775807}'
+
+---
+fetch the _tsid:
+  - skip:
+      version: " - 8.00.99"
+      reason: _tsid support introduced in 8.1.0
+
+  - do:
+      search:
+        index: test
+        body:
+          fields: [_tsid, metricset, ul]
+          query:
+            query_string:
+              query: '+@timestamp:"2021-04-28T18:35:24.467Z" +metricset:aa'
+
+  - match: {hits.total.value: 2}
+  - match: {hits.hits.0.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
+  - match: {hits.hits.0.fields.metricset: [aa]}
+  - match: {hits.hits.0.fields.ul: [9223372036854775808]}
+  - match: {hits.hits.1.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}
+  - match: {hits.hits.1.fields.metricset: [aa]}
+  - match: {hits.hits.1.fields.ul: [9223372036854775807]}
+
+---
+# TODO: Sort order is wrong here. Probably this is caused by the encoding of unsigned_long
+aggregate the _tsid:
+  - skip:
+      version: " - 8.00.99"
+      reason: _tsid support introduced in 8.1.0
+
+  - do:
+      search:
+        index: test
+        body:
+          size: 0
+          aggs:
+            tsids:
+              terms:
+                field: _tsid
+                order:
+                  _key: asc
+
+  - match: {hits.total.value: 8}
+  - match: {aggregations.tsids.buckets.0.key: {metricset: aa, ul: 9223372036854775808}}
+  - match: {aggregations.tsids.buckets.0.doc_count: 3}
+  - match: {aggregations.tsids.buckets.1.key: {metricset: aa, ul: 18446744073709551614}}
+  - match: {aggregations.tsids.buckets.1.doc_count: 1}
+  - match: {aggregations.tsids.buckets.2.key: {metricset: aa, ul: 18446744073709551615}}
+  - match: {aggregations.tsids.buckets.2.doc_count: 1 }
+  - match: {aggregations.tsids.buckets.3.key: {metricset: aa, ul: 9223372036854775807}}
+  - match: {aggregations.tsids.buckets.3.doc_count: 3}
+
+---
+# The time-series index is sorted by [_tsid, @timestamp] in ascending order by default
+# TODO: Sort order is wrong here. Probably this is caused by the encoding of unsigned_long
+default sort:
+  - skip:
+      version: " - 8.00.99"
+      reason: _tsid support introduced in 8.1.0
+
+  - do:
+      search:
+        index: test
+        body:
+          fields: [ _tsid ]
+
+  - match: {hits.total.value: 8 }
+  - match: {hits.hits.0.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
+  - match: {hits.hits.1.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
+  - match: {hits.hits.2.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
+  - match: {hits.hits.3.fields._tsid: [{metricset: aa, ul: 18446744073709551614}]}
+  - match: {hits.hits.4.fields._tsid: [{metricset: aa, ul: 18446744073709551615}]}
+  - match: {hits.hits.5.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}
+  - match: {hits.hits.6.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}
+  - match: {hits.hits.7.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}