diff --git a/server/src/main/java/org/opensearch/common/util/CollectionUtils.java b/server/src/main/java/org/opensearch/common/util/CollectionUtils.java index 12587bfc350b1..b37d7422652f8 100644 --- a/server/src/main/java/org/opensearch/common/util/CollectionUtils.java +++ b/server/src/main/java/org/opensearch/common/util/CollectionUtils.java @@ -32,6 +32,8 @@ package org.opensearch.common.util; +import com.carrotsearch.hppc.ObjectArrayList; +import org.apache.lucene.util.IntroSorter; import org.opensearch.common.Strings; import org.opensearch.common.collect.Iterators; @@ -41,12 +43,9 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.IdentityHashMap; -import java.util.Iterator; import java.util.List; -import java.util.ListIterator; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -91,29 +90,59 @@ public static List rotate(final List list, int distance) { return new RotatedList<>(list, d); } - /** - * in place de-duplicates items in a list - */ - public static void sortAndDedup(final List array, Comparator comparator) { - // base case: one item - if (array.size() <= 1) { - return; + public static void sortAndDedup(final ObjectArrayList array) { + int len = array.size(); + if (len > 1) { + sort(array); + int uniqueCount = 1; + for (int i = 1; i < len; ++i) { + if (!Arrays.equals(array.get(i), array.get(i - 1))) { + array.set(uniqueCount++, array.get(i)); + } + } + array.elementsCount = uniqueCount; } - array.sort(comparator); - ListIterator deduped = array.listIterator(); - T cmp = deduped.next(); // return the first item and advance - Iterator oldArray = array.iterator(); - oldArray.next(); // advance to the old to the second item (advanced to third below) - - do { - T old = oldArray.next(); // get the next item and advance iter - if (comparator.compare(cmp, old) != 0 && (cmp = deduped.next()) != old) { - deduped.set(old); + } + + public static void sort(final ObjectArrayList array) { + new IntroSorter() { + + byte[] pivot; + + @Override + protected void swap(int i, int j) { + final byte[] tmp = array.get(i); + array.set(i, array.get(j)); + array.set(j, tmp); + } + + @Override + protected int compare(int i, int j) { + return compare(array.get(i), array.get(j)); + } + + @Override + protected void setPivot(int i) { + pivot = array.get(i); + } + + @Override + protected int comparePivot(int j) { + return compare(pivot, array.get(j)); + } + + private int compare(byte[] left, byte[] right) { + for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) { + int a = left[i] & 0xFF; + int b = right[j] & 0xFF; + if (a != b) { + return a - b; + } + } + return left.length - right.length; } - } while (oldArray.hasNext()); - // in place update - array.subList(deduped.nextIndex(), array.size()).clear(); + }.sort(0, array.size()); } public static int[] toArray(Collection ints) { diff --git a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java index 9c60641e61258..6965f3bf664ed 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java @@ -32,6 +32,7 @@ package org.opensearch.index.mapper; +import com.carrotsearch.hppc.ObjectArrayList; import org.apache.lucene.document.StoredField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -51,7 +52,6 @@ import java.io.IOException; import java.time.ZoneId; -import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.Collections; @@ -240,28 +240,30 @@ protected String contentType() { */ public static class CustomBinaryDocValuesField extends CustomDocValuesField { - private final ArrayList bytesList; + private final ObjectArrayList bytesList; + + private int totalSize = 0; public CustomBinaryDocValuesField(String name, byte[] bytes) { super(name); - bytesList = new ArrayList<>(); + bytesList = new ObjectArrayList<>(); add(bytes); } public void add(byte[] bytes) { bytesList.add(bytes); + totalSize += bytes.length; } @Override public BytesRef binaryValue() { try { - // sort and dedup in place - CollectionUtils.sortAndDedup(bytesList, Arrays::compareUnsigned); - int size = bytesList.stream().map(b -> b.length).reduce(0, Integer::sum); - int length = bytesList.size(); - BytesStreamOutput out = new BytesStreamOutput(size + (length + 1) * 5); - out.writeVInt(length); // write total number of values - for (byte[] value : bytesList) { + CollectionUtils.sortAndDedup(bytesList); + int size = bytesList.size(); + BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5); + out.writeVInt(size); // write total number of values + for (int i = 0; i < size; i++) { + final byte[] value = bytesList.get(i); int valueLength = value.length; out.writeVInt(valueLength); out.writeBytes(value, 0, valueLength); diff --git a/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java b/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java index 837cc49b34aab..33053d3b9e499 100644 --- a/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java +++ b/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java @@ -37,10 +37,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -78,32 +76,6 @@ public void testRotate() { } } - private void assertDeduped(List array, Comparator cmp, int expectedLength) { - // test the dedup w/ ArrayLists and LinkedLists - List> types = List.of(new ArrayList(array), new LinkedList(array)); - for (List clone : types) { - // dedup the list - CollectionUtils.sortAndDedup(clone, cmp); - // verify unique elements - for (int i = 0; i < clone.size() - 1; ++i) { - assertNotEquals(cmp.compare(clone.get(i), clone.get(i + 1)), 0); - } - assertEquals(expectedLength, clone.size()); - } - } - - public void testSortAndDedup() { - // test no elements in a string array - assertDeduped(List.of(), Comparator.naturalOrder(), 0); - // test no elements in an integer array - assertDeduped(List.of(), Comparator.naturalOrder(), 0); - // test unsorted array - assertDeduped(List.of(-1, 0, 2, 1, -1, 19, -1), Comparator.naturalOrder(), 5); - // test sorted array - assertDeduped(List.of(-1, 0, 1, 2, 19, 19), Comparator.naturalOrder(), 5); - // test sorted - } - public void testEmptyPartition() { assertEquals(Collections.emptyList(), eagerPartition(Collections.emptyList(), 1)); }