Skip to content

Commit

Permalink
TSDB: Add unsigned_long dimension fields to _tsid #81284
Browse files Browse the repository at this point in the history
This PR builds on the work added in #80276 that generates the _tsid field for keyword, ip and number dimension fields.

It adds support for unsigned_long dimension fields.
  • Loading branch information
csoulios authored Dec 9, 2021
1 parent 09dc47f commit 524785f
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,16 @@ public static Map<String, Object> decodeTsid(StreamInput in) {

int type = in.read();
switch (type) {
case (byte) 's':
case (byte) 's': // parse a string
result.put(name, in.readBytesRef().utf8ToString());
break;
case (byte) 'l':
case (byte) 'l': // parse a long
result.put(name, in.readLong());
break;
case (byte) 'u': // parse an unsigned_long
Object ul = DocValueFormat.UnsignedLongShiftedDocValueFormat.INSTANCE.format(in.readLong());
result.put(name, ul);
break;
default:
throw new IllegalArgumentException("Cannot parse [" + name + "]: Unknown type [" + type + "]");
}
Expand All @@ -203,7 +207,7 @@ public static Map<String, Object> decodeTsid(StreamInput in) {
}
}

static BytesReference encodeTsidValue(String value) {
public static BytesReference encodeTsidValue(String value) {
try (BytesStreamOutput out = new BytesStreamOutput()) {
out.write((byte) 's');
/*
Expand All @@ -224,7 +228,7 @@ static BytesReference encodeTsidValue(String value) {
}
}

static BytesReference encodeTsidValue(long value) {
public static BytesReference encodeTsidValue(long value) {
try (BytesStreamOutput out = new BytesStreamOutput()) {
out.write((byte) 'l');
out.writeLong(value);
Expand All @@ -233,4 +237,14 @@ static BytesReference encodeTsidValue(long value) {
throw new IllegalArgumentException("Dimension field cannot be serialized.", e);
}
}

public static BytesReference encodeTsidUnsignedLongValue(long value) {
try (BytesStreamOutput out = new BytesStreamOutput()) {
out.write((byte) 'u');
out.writeLong(value);
return out.bytes();
} catch (IOException e) {
throw new IllegalArgumentException("Dimension field cannot be serialized.", e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
Expand All @@ -32,6 +33,7 @@
import org.elasticsearch.index.mapper.SimpleMappedFieldType;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesParams;
import org.elasticsearch.index.mapper.TimeSeriesParams.MetricType;
import org.elasticsearch.index.mapper.ValueFetcher;
Expand Down Expand Up @@ -543,6 +545,15 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
numericValue = unsignedToSortableSignedLong(numericValue);
}

if (dimension && numericValue != null) {
// We encode the tsid part of the dimension field. However, there is no point
// in encoding the tsid value if we do not generate the _tsid field.
BytesReference bytes = context.getMetadataMapper(TimeSeriesIdFieldMapper.NAME) != null
? TimeSeriesIdFieldMapper.encodeTsidUnsignedLongValue(numericValue)
: null;
context.doc().addDimensionBytes(fieldType().name(), bytes);
}

List<Field> fields = new ArrayList<>();
if (indexed) {
fields.add(new LongPoint(fieldType().name(), numericValue));
Expand All @@ -555,19 +566,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
String storedValued = isNullValue ? nullValue : Long.toUnsignedString(unsignedToSortableSignedLong(numericValue));
fields.add(new StoredField(fieldType().name(), storedValued));
}

if (dimension && fields.size() > 0) { // dimension == true requires that field is indexed and has doc-values
// Check that a dimension field is single-valued and not an array
if (context.doc().getByKey(fieldType().name()) != null) {
throw new IllegalArgumentException("Dimension field [" + fieldType().name() + "] cannot be a multi-valued field.");
}

// Add the field by key so that we can validate if it has been added
context.doc().addWithKey(fieldType().name(), new LongPoint(fieldType().name(), numericValue));
context.doc().addAll(fields.subList(1, fields.size()));
} else {
context.doc().addAll(fields);
}
context.doc().addAll(fields);

if (hasDocValues == false && (stored || indexed)) {
context.addToFieldNames(fieldType().name());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
setup:
- skip:
version: " - 8.0.99"
reason: _tsid introduced in 8.1.0

- do:
indices.create:
index: test
body:
settings:
index:
mode: time_series
routing_path: [metricset]
time_series:
start_time: 2021-04-28T00:00:00Z
end_time: 2021-04-29T00:00:00Z
mappings:
properties:
"@timestamp":
type: date
metricset:
type: keyword
time_series_dimension: true
ul:
type: unsigned_long
time_series_dimension: true

- do:
bulk:
refresh: true
index: test
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:24.467Z", "metricset": "aa", "voltage": 7.2, "ul": 9223372036854775807}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:34.467Z", "metricset": "aa", "voltage": 7.6, "ul": 9223372036854775807}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:44.467Z", "metricset": "aa", "voltage": 7.1, "ul": 18446744073709551614}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:54.467Z", "metricset": "aa", "voltage": 7.3, "ul": 18446744073709551615}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:24.467Z", "metricset": "aa", "voltage": 3.2, "ul": 9223372036854775808}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:34.467Z", "metricset": "aa", "voltage": 3.6, "ul": 9223372036854775808}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:44.467Z", "metricset": "aa", "voltage": 3.1, "ul": 9223372036854775808}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:54.467Z", "metricset": "aa", "voltage": 3.3, "ul": 9223372036854775807}'

---
fetch the _tsid:
- skip:
version: " - 8.00.99"
reason: _tsid support introduced in 8.1.0

- do:
search:
index: test
body:
fields: [_tsid, metricset, ul]
query:
query_string:
query: '+@timestamp:"2021-04-28T18:35:24.467Z" +metricset:aa'

- match: {hits.total.value: 2}
- match: {hits.hits.0.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
- match: {hits.hits.0.fields.metricset: [aa]}
- match: {hits.hits.0.fields.ul: [9223372036854775808]}
- match: {hits.hits.1.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}
- match: {hits.hits.1.fields.metricset: [aa]}
- match: {hits.hits.1.fields.ul: [9223372036854775807]}

---
# TODO: Sort order is wrong here. Probably this is caused by the encoding of unsigned_long
aggregate the _tsid:
- skip:
version: " - 8.00.99"
reason: _tsid support introduced in 8.1.0

- do:
search:
index: test
body:
size: 0
aggs:
tsids:
terms:
field: _tsid
order:
_key: asc

- match: {hits.total.value: 8}
- match: {aggregations.tsids.buckets.0.key: {metricset: aa, ul: 9223372036854775808}}
- match: {aggregations.tsids.buckets.0.doc_count: 3}
- match: {aggregations.tsids.buckets.1.key: {metricset: aa, ul: 18446744073709551614}}
- match: {aggregations.tsids.buckets.1.doc_count: 1}
- match: {aggregations.tsids.buckets.2.key: {metricset: aa, ul: 18446744073709551615}}
- match: {aggregations.tsids.buckets.2.doc_count: 1 }
- match: {aggregations.tsids.buckets.3.key: {metricset: aa, ul: 9223372036854775807}}
- match: {aggregations.tsids.buckets.3.doc_count: 3}

---
# The time-series index is sorted by [_tsid, @timestamp] in ascending order by default
# TODO: Sort order is wrong here. Probably this is caused by the encoding of unsigned_long
default sort:
- skip:
version: " - 8.00.99"
reason: _tsid support introduced in 8.1.0

- do:
search:
index: test
body:
fields: [ _tsid ]

- match: {hits.total.value: 8 }
- match: {hits.hits.0.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
- match: {hits.hits.1.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
- match: {hits.hits.2.fields._tsid: [{metricset: aa, ul: 9223372036854775808}]}
- match: {hits.hits.3.fields._tsid: [{metricset: aa, ul: 18446744073709551614}]}
- match: {hits.hits.4.fields._tsid: [{metricset: aa, ul: 18446744073709551615}]}
- match: {hits.hits.5.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}
- match: {hits.hits.6.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}
- match: {hits.hits.7.fields._tsid: [{metricset: aa, ul: 9223372036854775807}]}

0 comments on commit 524785f

Please sign in to comment.