Skip to content

Commit

Permalink
All star tree indexing commits as of 24aug (#35)
Browse files Browse the repository at this point in the history
* Changes to handle count and avg metrics as part of star tree mapping

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* addressing review comments

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* Adding timestamp rounding support in star tree

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* removing count as required metric

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* addressing comments

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* addressing comments

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* file formats rebase

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* nit fixes

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* added file format tests

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* writer versioning and addressing comments

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* fixes in merge, aggregators, added tests, addressed comments

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* doc values assertions

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* removing additional sorted numeric method

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* rebase fixes

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* max doc fixes

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* metadata to have total star tree docs

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* adding half hour and quarter hour calendar intervals

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* adding tests

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* separating child star node from children

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* merge conflicts for file formats

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* Doc count field changes in star tree

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* refactoring and addressing comments

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

* refactoring and fixing bugs

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>

---------

Signed-off-by: Bharathwaj G <bharath78910@gmail.com>
Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
Co-authored-by: Sarthak Aggarwal <sarthagg@amazon.com>
  • Loading branch information
bharath-techie and sarthakaggarwal97 authored Aug 24, 2024
1 parent 2301adf commit fd0d4df
Show file tree
Hide file tree
Showing 80 changed files with 6,965 additions and 943 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.ExtendedDateTimeUnit;
import org.opensearch.indices.IndicesService;
import org.opensearch.test.OpenSearchIntegTestCase;
import org.junit.After;
Expand Down Expand Up @@ -47,10 +50,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.startObject("startree-1")
.field("type", "star_tree")
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.startObject("date_dimension")
.field("name", "timestamp")
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.endObject()
Expand Down Expand Up @@ -90,21 +93,77 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
}
}

private static XContentBuilder createMaxDimTestMapping() {
private static XContentBuilder createDateTestMapping(boolean duplicate) {
try {
return jsonBuilder().startObject()
.startObject("composite")
.startObject("startree-1")
.field("type", "star_tree")
.startObject("config")
.startObject("date_dimension")
.field("name", "timestamp")
.startArray("calendar_intervals")
.value("day")
.value("quarter-hour")
.value(duplicate ? "quarter-hour" : "half-hour")
.endArray()
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", "numeric_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", "numeric_dv")
.endObject()
.endArray()
.endObject()
.endObject()
.endObject()
.startObject("properties")
.startObject("timestamp")
.field("type", "date")
.endObject()
.startObject("numeric_dv")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
.endObject()
.startObject("keyword_dv")
.field("type", "keyword")
.field("doc_values", true)
.endObject()
.startObject("keyword")
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
throw new IllegalStateException(e);
}
}

private static XContentBuilder createMaxDimTestMapping() {
try {
return jsonBuilder().startObject()
.startObject("composite")
.startObject("startree-1")
.field("type", "star_tree")
.startObject("config")
.startObject("date_dimension")
.field("name", "timestamp")
.startArray("calendar_intervals")
.value("day")
.value("month")
.value("half-hour")
.endArray()
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", "dim2")
.endObject()
Expand Down Expand Up @@ -139,7 +198,7 @@ private static XContentBuilder createMaxDimTestMapping() {
}
}

private static XContentBuilder createTestMappingWithoutStarTree(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createTestMappingWithoutStarTree() {
try {
return jsonBuilder().startObject()
.startObject("properties")
Expand Down Expand Up @@ -176,10 +235,10 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.startObject(sameStarTree ? "startree-1" : "startree-2")
.field("type", "star_tree")
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.startObject("date_dimension")
.field("name", "timestamp")
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", changeDim ? "numeric_new" : getDim(false, false))
.endObject()
Expand Down Expand Up @@ -258,11 +317,101 @@ public void testValidCompositeIndex() {
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
new DateTimeUnitAdapter(Rounding.DateTimeUnit.MINUTES_OF_HOUR),
ExtendedDateTimeUnit.HALF_HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
for (int i = 0; i < dateDim.getSortedCalendarIntervals().size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals(2, starTreeFieldType.getMetrics().size());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(
MetricStat.VALUE_COUNT,
MetricStat.SUM,
MetricStat.AVG
);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());

assertEquals("_doc_count", starTreeFieldType.getMetrics().get(1).getField());
assertEquals(List.of(MetricStat.DOC_COUNT), starTreeFieldType.getMetrics().get(1).getMetrics());

assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs());
assertEquals(
StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP,
starTreeFieldType.getStarTreeConfig().getBuildMode()
);
assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims());
}
}
}
}

public void testValidCompositeIndexWithDates() {
prepareCreate(TEST_INDEX).setMapping(createDateTestMapping(false)).get();
Iterable<IndicesService> dataNodeInstances = internalCluster().getDataNodeInstances(IndicesService.class);
for (IndicesService service : dataNodeInstances) {
final Index index = resolveIndex("test");
if (service.hasIndex(index)) {
IndexService indexService = service.indexService(index);
Set<CompositeMappedFieldType> fts = indexService.mapperService().getCompositeFieldTypes();

for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
ExtendedDateTimeUnit.QUARTER_HOUR_OF_DAY,
ExtendedDateTimeUnit.HALF_HOUR_OF_DAY,
new DateTimeUnitAdapter(Rounding.DateTimeUnit.DAY_OF_MONTH)
);
for (int i = 0; i < dateDim.getIntervals().size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(
MetricStat.VALUE_COUNT,
MetricStat.SUM,
MetricStat.AVG
);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs());
assertEquals(
StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP,
starTreeFieldType.getStarTreeConfig().getBuildMode()
);
assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims());
}
}
}
}

public void testValidCompositeIndexWithDuplicateDates() {
prepareCreate(TEST_INDEX).setMapping(createDateTestMapping(true)).get();
Iterable<IndicesService> dataNodeInstances = internalCluster().getDataNodeInstances(IndicesService.class);
for (IndicesService service : dataNodeInstances) {
final Index index = resolveIndex("test");
if (service.hasIndex(index)) {
IndexService indexService = service.indexService(index);
Set<CompositeMappedFieldType> fts = indexService.mapperService().getCompositeFieldTypes();

for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
ExtendedDateTimeUnit.QUARTER_HOUR_OF_DAY,
new DateTimeUnitAdapter(Rounding.DateTimeUnit.DAY_OF_MONTH)
);
for (int i = 0; i < dateDim.getIntervals().size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

Expand Down Expand Up @@ -291,7 +440,7 @@ public void testUpdateIndexWithAdditionOfStarTree() {
}

public void testUpdateIndexWithNewerStarTree() {
prepareCreate(TEST_INDEX).setMapping(createTestMappingWithoutStarTree(false, false, false)).get();
prepareCreate(TEST_INDEX).setMapping(createTestMappingWithoutStarTree()).get();

IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
Expand Down Expand Up @@ -338,11 +487,14 @@ public void testUpdateIndexWhenMappingIsSame() {
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
new DateTimeUnitAdapter(Rounding.DateTimeUnit.MINUTES_OF_HOUR),
ExtendedDateTimeUnit.HALF_HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
for (int i = 0; i < expectedTimeUnits.size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getIntervals().get(i).shortName());
}

assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

Expand Down Expand Up @@ -375,6 +527,7 @@ public void testMaxDimsCompositeIndex() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
() -> prepareCreate(TEST_INDEX).setMapping(createMaxDimTestMapping())
// Date dimension is considered as one dimension regardless of number of actual calendar intervals
.setSettings(Settings.builder().put(StarTreeIndexSettings.STAR_TREE_MAX_DIMENSIONS_SETTING.getKey(), 2))
.get()
);
Expand Down
18 changes: 9 additions & 9 deletions server/src/main/java/org/opensearch/common/Rounding.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public enum DateTimeUnit {
WEEK_OF_WEEKYEAR((byte) 1, "week", IsoFields.WEEK_OF_WEEK_BASED_YEAR, true, TimeUnit.DAYS.toMillis(7)) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(7);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundWeekOfWeekYear(utcMillis);
}

Expand All @@ -107,7 +107,7 @@ long extraLocalOffsetLookup() {
YEAR_OF_CENTURY((byte) 2, "year", ChronoField.YEAR_OF_ERA, false, 12) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(366);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundYear(utcMillis);
}

Expand All @@ -118,7 +118,7 @@ long extraLocalOffsetLookup() {
QUARTER_OF_YEAR((byte) 3, "quarter", IsoFields.QUARTER_OF_YEAR, false, 3) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(92);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundQuarterOfYear(utcMillis);
}

Expand All @@ -129,7 +129,7 @@ long extraLocalOffsetLookup() {
MONTH_OF_YEAR((byte) 4, "month", ChronoField.MONTH_OF_YEAR, false, 1) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(31);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundMonthOfYear(utcMillis);
}

Expand All @@ -138,7 +138,7 @@ long extraLocalOffsetLookup() {
}
},
DAY_OF_MONTH((byte) 5, "day", ChronoField.DAY_OF_MONTH, true, ChronoField.DAY_OF_MONTH.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, this.ratio);
}

Expand All @@ -147,7 +147,7 @@ long extraLocalOffsetLookup() {
}
},
HOUR_OF_DAY((byte) 6, "hour", ChronoField.HOUR_OF_DAY, true, ChronoField.HOUR_OF_DAY.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -162,7 +162,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.MINUTE_OF_HOUR.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -177,7 +177,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.SECOND_OF_MINUTE.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand Down Expand Up @@ -210,7 +210,7 @@ public long extraLocalOffsetLookup() {
* @param utcMillis the milliseconds since the epoch
* @return the rounded down milliseconds since the epoch
*/
abstract long roundFloor(long utcMillis);
public abstract long roundFloor(long utcMillis);

/**
* When looking up {@link LocalTimeOffset} go this many milliseconds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.index.codec.composite;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;

import java.io.IOException;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesProducer;

/**
* An interface that provides access to document values for a specific field.
*
* @opensearch.experimental
*/
public interface DocValuesProvider {

// /**
// * Returns the sorted numeric document values for the specified field.
// *
// * @param fieldName The name of the field for which to retrieve the sorted numeric document values.
// * @return The sorted numeric document values for the specified field.
// * @throws IOException If an error occurs while retrieving the sorted numeric document values.
// */
// SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException;

/**
* Returns the DocValuesProducer instance.
*
* @return The DocValuesProducer instance.
*/
DocValuesProducer getDocValuesProducer();
}
Loading

0 comments on commit fd0d4df

Please sign in to comment.