Skip to content

Commit

Permalink
Check UTF16 string size before converting to String to avoid OOME (op…
Browse files Browse the repository at this point in the history
…ensearch-project#7963) (opensearch-project#8344)

* Check UTF16 string size before converting to string to avoid OOME

Signed-off-by: Rishab Nahata <rnnahata@amazon.com>
  • Loading branch information
imRishN authored Jun 29, 2023
1 parent f90725d commit 9ee8fe6
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Refactor] Metadata members from ImmutableOpenMap to j.u.Map ([#7165](https://github.com/opensearch-project/OpenSearch/pull/7165))
- [Refactor] more ImmutableOpenMap to jdk Map in cluster package ([#7301](https://github.com/opensearch-project/OpenSearch/pull/7301))
- [Refactor] ImmutableOpenMap to j.u.Map in IndexMetadata ([#7306](https://github.com/opensearch-project/OpenSearch/pull/7306))
- Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.UnicodeUtil;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.core.xcontent.XContentBuilder;

Expand All @@ -49,6 +50,7 @@
public abstract class AbstractBytesReference implements BytesReference {

private Integer hash = null; // we cache the hash of this reference since it can be quite costly to re-calculated it
private static final int MAX_UTF16_LENGTH = Integer.MAX_VALUE >> 1;

@Override
public int getInt(int index) {
Expand Down Expand Up @@ -80,9 +82,19 @@ public void writeTo(OutputStream os) throws IOException {
}
}

protected int getMaxUTF16Length() {
return MAX_UTF16_LENGTH;
}

@Override
public String utf8ToString() {
return toBytesRef().utf8ToString();
BytesRef bytesRef = toBytesRef();
final char[] ref = new char[bytesRef.length];
final int len = UnicodeUtil.UTF8toUTF16(bytesRef, ref);
if (len > getMaxUTF16Length()) {
throw new IllegalArgumentException("UTF16 String size is " + len + ", should be less than " + getMaxUTF16Length());
}
return new String(ref, 0, len);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,45 @@ public void testToUtf8() throws IOException {
// TODO: good way to test?
}

public void testUTF8toString_ExceedsMaxLength() {
AbstractBytesReference abr = new TestAbstractBytesReference();
IllegalArgumentException e = assertThrows(IllegalArgumentException.class, abr::utf8ToString);
assertTrue(e.getMessage().contains("UTF16 String size is"));
assertTrue(e.getMessage().contains("should be less than"));
}

static class TestAbstractBytesReference extends AbstractBytesReference {
@Override
public byte get(int index) {
return 0;
}

@Override
public int length() {
return 0;
}

@Override
public BytesReference slice(int from, int length) {
return null;
}

@Override
public long ramBytesUsed() {
return 0;
}

@Override
public BytesRef toBytesRef() {
return new BytesRef("UTF16 length exceed test");
}

@Override
public int getMaxUTF16Length() {
return 1;
}
}

public void testToBytesRef() throws IOException {
int length = randomIntBetween(0, PAGE_SIZE);
BytesReference pbr = newBytesReference(length);
Expand Down

0 comments on commit 9ee8fe6

Please sign in to comment.