From a91294ff92370b9720c8ad44d2aa7c5bac43b140 Mon Sep 17 00:00:00 2001 From: akolarkunnu Date: Mon, 3 Jun 2024 18:38:32 +0530 Subject: [PATCH 01/15] COMPAT locale provider will be removed in a future release Description: From JDK21 onwards a new warning has started to come, "WARNING: COMPAT locale provider will be removed in a future release". So, we have to avoid usage of COMPAT provider. We were setting exlpicitly to COMPAT locale provider in couple of places, this change is to convert COMPAT to CLDR locale provider. After this change, couple of tests started to fail becasue some locale data has minor changes in CLDR compared to COMPAT. For example, day and month short names of GERMAN "de" locale are different in CLDR and COMPAT, just need to add a . in the end for CLDR. Resolves #11550 Signed-off-by: Abdul Muneer Kolarkunnu --- CHANGELOG.md | 1 + .../gradle/OpenSearchTestBasePlugin.java | 2 +- .../tools/launchers/SystemJvmOptions.java | 7 +------ gradle/ide.gradle | 2 +- .../opensearch/search/query/SearchQueryIT.java | 18 ++++++++---------- .../index/mapper/DateFieldMapperTests.java | 2 +- 6 files changed, 13 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78d294c8b4c97..aaefa13510cac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.xmlbeans:xmlbeans` from 5.2.0 to 5.2.1 ([#13839](https://github.com/opensearch-project/OpenSearch/pull/13839)) ### Changed +- Changed locale provider from COMPAT to CLDR ([#11550]()) - Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) - Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481)) - Adds support to inject telemetry instances to plugins ([#13636](https://github.com/opensearch-project/OpenSearch/pull/13636)) diff --git a/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java index 2ea8c2d015ecc..d0cb2da9c1dd3 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java @@ -110,7 +110,7 @@ public void execute(Task t) { if (BuildParams.getRuntimeJavaVersion() == JavaVersion.VERSION_1_8) { test.systemProperty("java.locale.providers", "SPI,JRE"); } else { - test.systemProperty("java.locale.providers", "SPI,COMPAT"); + test.systemProperty("java.locale.providers", "SPI,CLDR"); if (test.getJavaVersion().compareTo(JavaVersion.VERSION_17) < 0) { test.jvmArgs("--illegal-access=warn"); } diff --git a/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java b/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java index 726c381db09f6..af7138569972a 100644 --- a/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java +++ b/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java @@ -105,13 +105,8 @@ private static String javaLocaleProviders() { SPI setting is used to allow loading custom CalendarDataProvider in jdk8 it has to be loaded from jre/lib/ext, in jdk9+ it is already within ES project and on a classpath - - Due to internationalization enhancements in JDK 9 OpenSearch need to set the provider to COMPAT otherwise time/date - parsing will break in an incompatible way for some date patterns and locales. - //TODO COMPAT will be deprecated in at some point, see please https://bugs.openjdk.java.net/browse/JDK-8232906 - See also: documentation in server/org.opensearch.common.time.IsoCalendarDataProvider */ - return "-Djava.locale.providers=SPI,COMPAT"; + return "-Djava.locale.providers=SPI,CLDR"; } } diff --git a/gradle/ide.gradle b/gradle/ide.gradle index 14d6b2982ccd0..4c4f3b07836c5 100644 --- a/gradle/ide.gradle +++ b/gradle/ide.gradle @@ -81,7 +81,7 @@ if (System.getProperty('idea.active') == 'true') { } runConfigurations { defaults(JUnit) { - vmParameters = '-ea -Djava.locale.providers=SPI,COMPAT' + vmParameters = '-ea -Djava.locale.providers=SPI,CLDR' if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_17) { vmParameters += ' -Djava.security.manager=allow' } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java index a58db51780826..01ad06757640c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java @@ -1914,14 +1914,8 @@ public void testRangeQueryWithTimeZone() throws Exception { * Test range with a custom locale, e.g. "de" in this case. Documents here mention the day of week * as "Mi" for "Mittwoch (Wednesday" and "Do" for "Donnerstag (Thursday)" and the month in the query * as "Dez" for "Dezember (December)". - * Note: this test currently needs the JVM arg `-Djava.locale.providers=SPI,COMPAT` to be set. - * When running with gradle this is done implicitly through the BuildPlugin, but when running from - * an IDE this might need to be set manually in the run configuration. See also CONTRIBUTING.md section - * on "Configuring IDEs And Running Tests". */ public void testRangeQueryWithLocaleMapping() throws Exception { - assert ("SPI,COMPAT".equals(System.getProperty("java.locale.providers"))) : "`-Djava.locale.providers=SPI,COMPAT` needs to be set"; - assertAcked( prepareCreate("test").setMapping( jsonBuilder().startObject() @@ -1938,17 +1932,21 @@ public void testRangeQueryWithLocaleMapping() throws Exception { indexRandom( true, - client().prepareIndex("test").setId("1").setSource("date_field", "Mi, 06 Dez 2000 02:55:00 -0800"), - client().prepareIndex("test").setId("2").setSource("date_field", "Do, 07 Dez 2000 02:55:00 -0800") + client().prepareIndex("test").setId("1").setSource("date_field", "Mi., 06 Dez. 2000 02:55:00 -0800"), + client().prepareIndex("test").setId("2").setSource("date_field", "Do., 07 Dez. 2000 02:55:00 -0800") ); SearchResponse searchResponse = client().prepareSearch("test") - .setQuery(QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Do, 07 Dez 2000 00:00:00 -0800")) + .setQuery( + QueryBuilders.rangeQuery("date_field").gte("Di., 05 Dez. 2000 02:55:00 -0800").lte("Do., 07 Dez. 2000 00:00:00 -0800") + ) .get(); assertHitCount(searchResponse, 1L); searchResponse = client().prepareSearch("test") - .setQuery(QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Fr, 08 Dez 2000 00:00:00 -0800")) + .setQuery( + QueryBuilders.rangeQuery("date_field").gte("Di., 05 Dez. 2000 02:55:00 -0800").lte("Fr., 08 Dez. 2000 00:00:00 -0800") + ) .get(); assertHitCount(searchResponse, 2L); } diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java index 2aa310ae959d9..98bcaa3a1a46b 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java @@ -208,7 +208,7 @@ public void testChangeLocale() throws IOException { fieldMapping(b -> b.field("type", "date").field("format", "E, d MMM yyyy HH:mm:ss Z").field("locale", "de")) ); - mapper.parse(source(b -> b.field("field", "Mi, 06 Dez 2000 02:55:00 -0800"))); + mapper.parse(source(b -> b.field("field", "Mi., 06 Dez. 2000 02:55:00 -0800"))); } public void testNullValue() throws IOException { From 4efe07963ceb26b143b7111c6b4e5cd1ebddcfbf Mon Sep 17 00:00:00 2001 From: akolarkunnu Date: Mon, 3 Jun 2024 18:43:18 +0530 Subject: [PATCH 02/15] COMPAT locale provider will be removed in a future release Description: From JDK21 onwards a new warning has started to come, "WARNING: COMPAT locale provider will be removed in a future release". So, we have to avoid usage of COMPAT provider. We were setting exlpicitly to COMPAT locale provider in couple of places, this change is to convert COMPAT to CLDR locale provider. After this change, couple of tests started to fail becasue some locale data has minor changes in CLDR compared to COMPAT. For example, day and month short names of GERMAN "de" locale are different in CLDR and COMPAT, just need to add a . in the end for CLDR. Resolves #11550 Signed-off-by: Abdul Muneer Kolarkunnu muneer.kolarkunnu@netapp.com --- CHANGELOG-3.0.md | 1 + CHANGELOG.md | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG-3.0.md b/CHANGELOG-3.0.md index 964383078c38d..5d137fd5b14a7 100644 --- a/CHANGELOG-3.0.md +++ b/CHANGELOG-3.0.md @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Dependencies ### Changed +- Changed locale provider from COMPAT to CLDR ([#11550](https://github.com/opensearch-project/OpenSearch/pull/13934)) - Migrate client transports to Apache HttpClient / Core 5.x ([#4459](https://github.com/opensearch-project/OpenSearch/pull/4459)) - Change http code on create index API with bad input raising NotXContentException from 500 to 400 ([#4773](https://github.com/opensearch-project/OpenSearch/pull/4773)) - Improve summary error message for invalid setting updates ([#4792](https://github.com/opensearch-project/OpenSearch/pull/4792)) diff --git a/CHANGELOG.md b/CHANGELOG.md index aaefa13510cac..78d294c8b4c97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.xmlbeans:xmlbeans` from 5.2.0 to 5.2.1 ([#13839](https://github.com/opensearch-project/OpenSearch/pull/13839)) ### Changed -- Changed locale provider from COMPAT to CLDR ([#11550]()) - Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) - Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481)) - Adds support to inject telemetry instances to plugins ([#13636](https://github.com/opensearch-project/OpenSearch/pull/13636)) From 6af317b4740207a9f67a8aab38dc093528ae28b8 Mon Sep 17 00:00:00 2001 From: Liyun Xiu Date: Mon, 3 Jun 2024 05:51:24 -0700 Subject: [PATCH 03/15] Add comment to XContentBuilder.toString() (#13809) * Add comment to XContentBuilder.toString Signed-off-by: Liyun Xiu * Fix typo Signed-off-by: Liyun Xiu --------- Signed-off-by: Liyun Xiu --- .../java/org/opensearch/core/xcontent/XContentBuilder.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/core/src/main/java/org/opensearch/core/xcontent/XContentBuilder.java b/libs/core/src/main/java/org/opensearch/core/xcontent/XContentBuilder.java index 976f353100c55..552945d085884 100644 --- a/libs/core/src/main/java/org/opensearch/core/xcontent/XContentBuilder.java +++ b/libs/core/src/main/java/org/opensearch/core/xcontent/XContentBuilder.java @@ -157,6 +157,9 @@ public static XContentBuilder builder(XContent xContent, Set includes, S /** * Returns a string representation of the builder (only applicable for text based xcontent). + * Note: explicitly or implicitly (from debugger) calling toString() could cause XContentBuilder + * to close which is a side effect done by @see BytesReference#bytes(). + * Trying to write more contents after toString() will cause NPE. Use it with caution. */ @Override public String toString() { From ecfd1e83d463c0c230b67070f2ab41011f348874 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:50:02 -0400 Subject: [PATCH 04/15] Bump com.netflix.nebula.ospackage-base from 11.9.0 to 11.9.1 in /distribution/packages (#13933) * Bump com.netflix.nebula.ospackage-base in /distribution/packages Bumps com.netflix.nebula.ospackage-base from 11.9.0 to 11.9.1. --- updated-dependencies: - dependency-name: com.netflix.nebula.ospackage-base dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + distribution/packages/build.gradle | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78d294c8b4c97..788b09928a8bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `reactor-netty` from 1.1.17 to 1.1.19 ([#13825](https://github.com/opensearch-project/OpenSearch/pull/13825)) - Bump `commons-cli:commons-cli` from 1.7.0 to 1.8.0 ([#13840](https://github.com/opensearch-project/OpenSearch/pull/13840)) - Bump `org.apache.xmlbeans:xmlbeans` from 5.2.0 to 5.2.1 ([#13839](https://github.com/opensearch-project/OpenSearch/pull/13839)) +- Bump `com.netflix.nebula.ospackage-base` from 11.9.0 to 11.9.1 ([#13933](https://github.com/opensearch-project/OpenSearch/pull/13933)) ### Changed - Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) diff --git a/distribution/packages/build.gradle b/distribution/packages/build.gradle index fbd13f03af814..211b3bd55da60 100644 --- a/distribution/packages/build.gradle +++ b/distribution/packages/build.gradle @@ -63,7 +63,7 @@ import java.util.regex.Pattern */ plugins { - id "com.netflix.nebula.ospackage-base" version "11.9.0" + id "com.netflix.nebula.ospackage-base" version "11.9.1" } void addProcessFilesTask(String type, boolean jdk) { From 4e700ed1807442ee75c8704e852eeb6d8d4d0d72 Mon Sep 17 00:00:00 2001 From: Rohit Ashiwal Date: Mon, 3 Jun 2024 23:01:52 +0530 Subject: [PATCH 05/15] Painless: ensure type "UnmodifiableMap" for params (#13885) Signed-off-by: Rohit Ashiwal --- CHANGELOG.md | 1 + .../painless/WhenThingsGoWrongTests.java | 3 ++ .../test/painless/17_update_error.yml | 47 +++++++++++++++++++ .../java/org/opensearch/script/Script.java | 2 +- .../org/opensearch/script/UpdateScript.java | 3 +- 5 files changed, 54 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 788b09928a8bc..3554565e737e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow clearing `remote_store.compatibility_mode` setting ([#13646](https://github.com/opensearch-project/OpenSearch/pull/13646)) - Fix ReplicaShardBatchAllocator to batch shards without duplicates ([#13710](https://github.com/opensearch-project/OpenSearch/pull/13710)) - Don't return negative scores from `multi_match` query with `cross_fields` type ([#13829](https://github.com/opensearch-project/OpenSearch/pull/13829)) +- Painless: ensure type "UnmodifiableMap" for params ([#13885](https://github.com/opensearch-project/OpenSearch/pull/13885)) - Pass parent filter to inner hit query ([#13903](https://github.com/opensearch-project/OpenSearch/pull/13903)) - Fix NPE on restore searchable snapshot ([#13911](https://github.com/opensearch-project/OpenSearch/pull/13911)) diff --git a/modules/lang-painless/src/test/java/org/opensearch/painless/WhenThingsGoWrongTests.java b/modules/lang-painless/src/test/java/org/opensearch/painless/WhenThingsGoWrongTests.java index 0d498e16154c8..3d48e96117a1c 100644 --- a/modules/lang-painless/src/test/java/org/opensearch/painless/WhenThingsGoWrongTests.java +++ b/modules/lang-painless/src/test/java/org/opensearch/painless/WhenThingsGoWrongTests.java @@ -354,6 +354,9 @@ public void testInvalidAssignment() { assertEquals(iae.getMessage(), "invalid assignment: cannot assign a value to addition operation [+]"); iae = expectScriptThrows(IllegalArgumentException.class, () -> exec("Double.x() = 1;")); assertEquals(iae.getMessage(), "invalid assignment: cannot assign a value to method call [x/0]"); + + expectScriptThrows(UnsupportedOperationException.class, () -> exec("params['modifyingParamsMap'] = 2;")); + expectScriptThrows(UnsupportedOperationException.class, () -> exec("params.modifyingParamsMap = 2;")); } public void testCannotResolveSymbol() { diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/17_update_error.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/17_update_error.yml index 3d6db1b781caf..fdbc6de37e3ea 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/17_update_error.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/17_update_error.yml @@ -13,3 +13,50 @@ - match: { error.root_cause.0.position.offset: 13 } - match: { error.root_cause.0.position.start: 0 } - match: { error.root_cause.0.position.end: 38 } + +--- +"Test modifying params map from script leads to exception": + - skip: + features: "node_selector" + + - do: + put_script: + id: "except" + body: {"script": {"lang": "painless", "source": "params.that = 3"}} + + - do: + indices.create: + index: "test" + body: + settings: + index: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + this: + type: "integer" + that: + type: "integer" + + - do: + index: + index: "test" + id: 1 + body: {"this": 1, "that": 2} + + - do: + catch: /unsupported_operation_exception/ + node_selector: + version: "2.15.0 - " + update: + index: "test" + id: 1 + body: + script: + id: "except" + params: {"this": 2} + + - match: { error.caused_by.position.offset: 6 } + - match: { error.caused_by.position.start: 0 } + - match: { error.caused_by.position.end: 15 } diff --git a/server/src/main/java/org/opensearch/script/Script.java b/server/src/main/java/org/opensearch/script/Script.java index 9e74314c281cd..f18bd992cb00d 100644 --- a/server/src/main/java/org/opensearch/script/Script.java +++ b/server/src/main/java/org/opensearch/script/Script.java @@ -589,7 +589,7 @@ public Script(StreamInput in) throws IOException { @SuppressWarnings("unchecked") Map options = (Map) (Map) in.readMap(); this.options = options; - this.params = in.readMap(); + this.params = Collections.unmodifiableMap(in.readMap()); } @Override diff --git a/server/src/main/java/org/opensearch/script/UpdateScript.java b/server/src/main/java/org/opensearch/script/UpdateScript.java index 86697e9ae550e..f6355fe24817b 100644 --- a/server/src/main/java/org/opensearch/script/UpdateScript.java +++ b/server/src/main/java/org/opensearch/script/UpdateScript.java @@ -32,6 +32,7 @@ package org.opensearch.script; +import java.util.Collections; import java.util.Map; /** @@ -53,7 +54,7 @@ public abstract class UpdateScript { private final Map ctx; public UpdateScript(Map params, Map ctx) { - this.params = params; + this.params = Collections.unmodifiableMap(params); this.ctx = ctx; } From 4dffd96ee8d2313f38b50f0e4437e5276efda679 Mon Sep 17 00:00:00 2001 From: Shivansh Arora <31575408+shiv0408@users.noreply.github.com> Date: Mon, 3 Jun 2024 23:08:20 +0530 Subject: [PATCH 06/15] Bump japicmp plugin version (#13940) Signed-off-by: Shivansh Arora --- server/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/build.gradle b/server/build.gradle index 9714f13ec67d6..15301e68fca3d 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -36,7 +36,7 @@ plugins { id('opensearch.publish') id('opensearch.internal-cluster-test') id('opensearch.optional-dependencies') - id('me.champeau.gradle.japicmp') version '0.4.2' + id('me.champeau.gradle.japicmp') version '0.4.3' } publishing { From bcdca37536492c91e0acf4d452546fbf2b086bfd Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Mon, 3 Jun 2024 13:42:15 -0700 Subject: [PATCH 07/15] [Derived Field] Dynamic FieldType inference based on random sampling of documents (#13592) --------- Signed-off-by: Rishabh Maurya --- .../index/mapper/FieldTypeInference.java | 181 +++++++++++++++ .../index/mapper/FieldTypeInferenceTests.java | 210 ++++++++++++++++++ 2 files changed, 391 insertions(+) create mode 100644 server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/FieldTypeInferenceTests.java diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java b/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java new file mode 100644 index 0000000000000..713bdc4e691cd --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java @@ -0,0 +1,181 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.ReaderUtil; +import org.opensearch.common.Randomness; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.TreeSet; + +/** + * This class performs type inference by analyzing the _source documents. It uses a random sample of documents to infer the field type, similar to dynamic mapping type guessing logic. + * Unlike guessing based on the first document, where field could be missing, this method generates a random sample to make a more accurate inference. + * This approach is especially useful for handling missing fields, which is common in nested fields within derived fields of object types. + * + *

The sample size should be chosen carefully to ensure a high probability of selecting at least one document where the field is present. + * However, it's essential to strike a balance because a large sample size can lead to performance issues since each sample document's _source field is loaded and examined until the field is found. + * + *

Determining the sample size ({@code S}) is akin to deciding how many balls to draw from a bin, ensuring a high probability ({@code >=P}) of drawing at least one green ball (documents with the field) from a mixture of {@code R } red balls (documents without the field) and {@code G } green balls: + *

{@code
+ * P >= 1 - C(R, S) / C(R + G, S)
+ * }
+ * Here, {@code C()} represents the binomial coefficient. + * For a high confidence level, we aim for {@code P >= 0.95 }. For example, with {@code 10^7 } documents where the field is present in {@code 2% } of them, the sample size {@code S } should be around 149 to achieve a probability of {@code 0.95}. + */ +public class FieldTypeInference { + private final IndexReader indexReader; + private final String indexName; + private final MapperService mapperService; + // TODO expose using a index setting + private int sampleSize; + private static final int DEFAULT_SAMPLE_SIZE = 150; + private static final int MAX_SAMPLE_SIZE_ALLOWED = 1000; + + public FieldTypeInference(String indexName, MapperService mapperService, IndexReader indexReader) { + this.indexName = indexName; + this.mapperService = mapperService; + this.indexReader = indexReader; + this.sampleSize = DEFAULT_SAMPLE_SIZE; + } + + public void setSampleSize(int sampleSize) { + if (sampleSize > MAX_SAMPLE_SIZE_ALLOWED) { + throw new IllegalArgumentException("sample_size should be less than " + MAX_SAMPLE_SIZE_ALLOWED); + } + this.sampleSize = sampleSize; + } + + public int getSampleSize() { + return sampleSize; + } + + public Mapper infer(ValueFetcher valueFetcher) throws IOException { + RandomSourceValuesGenerator valuesGenerator = new RandomSourceValuesGenerator(sampleSize, indexReader, valueFetcher); + Mapper inferredMapper = null; + while (inferredMapper == null && valuesGenerator.hasNext()) { + List values = valuesGenerator.next(); + if (values == null || values.isEmpty()) { + continue; + } + // always use first value in case of multi value field to infer type + inferredMapper = inferTypeFromObject(values.get(0)); + } + return inferredMapper; + } + + private Mapper inferTypeFromObject(Object o) throws IOException { + if (o == null) { + return null; + } + DocumentMapper mapper = mapperService.documentMapper(); + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("field", o).endObject(); + BytesReference bytesReference = BytesReference.bytes(builder); + SourceToParse sourceToParse = new SourceToParse(indexName, "_id", bytesReference, JsonXContent.jsonXContent.mediaType()); + ParsedDocument parsedDocument = mapper.parse(sourceToParse); + Mapping mapping = parsedDocument.dynamicMappingsUpdate(); + return mapping.root.getMapper("field"); + } + + private static class RandomSourceValuesGenerator implements Iterator> { + private final ValueFetcher valueFetcher; + private final IndexReader indexReader; + private final SourceLookup sourceLookup; + private final int[] docs; + private int iter; + private int leaf; + private final int MAX_ATTEMPTS_TO_GENERATE_RANDOM_SAMPLES = 10000; + + public RandomSourceValuesGenerator(int sampleSize, IndexReader indexReader, ValueFetcher valueFetcher) { + this.valueFetcher = valueFetcher; + this.indexReader = indexReader; + sampleSize = Math.min(sampleSize, indexReader.numDocs()); + this.docs = getSortedRandomNum( + sampleSize, + indexReader.numDocs(), + Math.max(sampleSize, MAX_ATTEMPTS_TO_GENERATE_RANDOM_SAMPLES) + ); + this.iter = 0; + this.leaf = -1; + this.sourceLookup = new SourceLookup(); + if (hasNext()) { + setNextLeaf(); + } + } + + @Override + public boolean hasNext() { + return iter < docs.length && leaf < indexReader.leaves().size(); + } + + /** + * Ensure hasNext() is called before calling next() + */ + @Override + public List next() { + int docID = docs[iter] - indexReader.leaves().get(leaf).docBase; + if (docID >= indexReader.leaves().get(leaf).reader().numDocs()) { + setNextLeaf(); + } + // deleted docs are getting used to infer type, which should be okay? + sourceLookup.setSegmentAndDocument(indexReader.leaves().get(leaf), docs[iter] - indexReader.leaves().get(leaf).docBase); + try { + iter++; + return valueFetcher.fetchValues(sourceLookup); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void setNextLeaf() { + int readerIndex = ReaderUtil.subIndex(docs[iter], indexReader.leaves()); + if (readerIndex != leaf) { + leaf = readerIndex; + } else { + // this will only happen when leaves are exhausted and readerIndex will be indexReader.leaves()-1. + leaf++; + } + if (leaf < indexReader.leaves().size()) { + valueFetcher.setNextReader(indexReader.leaves().get(leaf)); + } + } + + private static int[] getSortedRandomNum(int sampleSize, int upperBound, int attempts) { + Set generatedNumbers = new TreeSet<>(); + Random random = Randomness.get(); + int itr = 0; + if (upperBound <= 10 * sampleSize) { + List numberList = new ArrayList<>(); + for (int i = 0; i < upperBound; i++) { + numberList.add(i); + } + Collections.shuffle(numberList, random); + generatedNumbers.addAll(numberList.subList(0, sampleSize)); + } else { + while (generatedNumbers.size() < sampleSize && itr++ < attempts) { + int randomNumber = random.nextInt(upperBound); + generatedNumbers.add(randomNumber); + } + } + return generatedNumbers.stream().mapToInt(Integer::valueOf).toArray(); + } + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/FieldTypeInferenceTests.java b/server/src/test/java/org/opensearch/index/mapper/FieldTypeInferenceTests.java new file mode 100644 index 0000000000000..807d0e96ce5e3 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/FieldTypeInferenceTests.java @@ -0,0 +1,210 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.core.index.Index; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.Mockito.when; + +public class FieldTypeInferenceTests extends MapperServiceTestCase { + + private static final Map> documentMap; + static { + List listWithNull = new ArrayList<>(); + listWithNull.add(null); + documentMap = new HashMap<>(); + documentMap.put("text_field", List.of("The quick brown fox jumps over the lazy dog.")); + documentMap.put("int_field", List.of(789)); + documentMap.put("float_field", List.of(123.45)); + documentMap.put("date_field_1", List.of("2024-05-12T15:45:00Z")); + documentMap.put("date_field_2", List.of("2024-05-12")); + documentMap.put("boolean_field", List.of(true)); + documentMap.put("null_field", listWithNull); + documentMap.put("array_field_int", List.of(100, 200, 300, 400, 500)); + documentMap.put("array_field_text", List.of("100", "200")); + documentMap.put("object_type", List.of(Map.of("foo", Map.of("bar", 10)))); + } + + public void testJsonSupportedTypes() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + int totalDocs = 10000; + int docsPerLeafCount = 1000; + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < totalDocs; i++) { + iw.addDocument(d); + if ((i + 1) % docsPerLeafCount == 0) { + iw.commit(); + } + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + String[] fieldName = { "text_field" }; + Mapper mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("text", mapper.typeName()); + + fieldName[0] = "int_field"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("long", mapper.typeName()); + + fieldName[0] = "float_field"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("float", mapper.typeName()); + + fieldName[0] = "date_field_1"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("date", mapper.typeName()); + + fieldName[0] = "date_field_2"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("date", mapper.typeName()); + + fieldName[0] = "boolean_field"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("boolean", mapper.typeName()); + + fieldName[0] = "array_field_int"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("long", mapper.typeName()); + + fieldName[0] = "array_field_text"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("text", mapper.typeName()); + + fieldName[0] = "object_type"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertEquals("object", mapper.typeName()); + + fieldName[0] = "null_field"; + mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertNull(mapper); + + // If field is missing ensure that sample docIDs generated for inference are ordered and are in bounds + fieldName[0] = "missing_field"; + List> docsEvaluated = new ArrayList<>(); + int[] totalDocsEvaluated = { 0 }; + typeInference.setSampleSize(50); + mapper = typeInference.infer(new ValueFetcher() { + @Override + public List fetchValues(SourceLookup lookup) throws IOException { + docsEvaluated.get(docsEvaluated.size() - 1).add(lookup.docId()); + totalDocsEvaluated[0]++; + return documentMap.get(fieldName[0]); + } + + @Override + public void setNextReader(LeafReaderContext leafReaderContext) { + docsEvaluated.add(new ArrayList<>()); + } + }); + assertNull(mapper); + assertEquals(typeInference.getSampleSize(), totalDocsEvaluated[0]); + for (List docsPerLeaf : docsEvaluated) { + for (int j = 0; j < docsPerLeaf.size() - 1; j++) { + assertTrue(docsPerLeaf.get(j) < docsPerLeaf.get(j + 1)); + } + if (!docsPerLeaf.isEmpty()) { + assertTrue(docsPerLeaf.get(0) >= 0 && docsPerLeaf.get(docsPerLeaf.size() - 1) < docsPerLeafCount); + } + } + } + } + } + + public void testDeleteAllDocs() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + int totalDocs = 10000; + int docsPerLeafCount = 1000; + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < totalDocs; i++) { + iw.addDocument(d); + if ((i + 1) % docsPerLeafCount == 0) { + iw.commit(); + } + } + iw.deleteAll(); + iw.commit(); + + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + String[] fieldName = { "text_field" }; + Mapper mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertNull(mapper); + } + } + } + + public void testZeroDoc() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + String[] fieldName = { "text_field" }; + Mapper mapper = typeInference.infer(lookup -> documentMap.get(fieldName[0])); + assertNull(mapper); + } + } + } + + public void testSampleGeneration() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + int totalDocs = 10000; + int docsPerLeafCount = 1000; + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < totalDocs; i++) { + iw.addDocument(d); + if ((i + 1) % docsPerLeafCount == 0) { + iw.commit(); + } + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + typeInference.setSampleSize(1000 - 1); + typeInference.infer(lookup -> documentMap.get("unknown_field")); + assertThrows(IllegalArgumentException.class, () -> typeInference.setSampleSize(1000 + 1)); + typeInference.setSampleSize(1000); + typeInference.infer(lookup -> documentMap.get("unknown_field")); + } + } + } +} From 0843699ce4c1b6227bdd992dc7ac7c87e598abeb Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Mon, 3 Jun 2024 13:47:08 -0700 Subject: [PATCH 08/15] [DerivedField] object type support in mappings and new settings for derived field (#13717) --------- Signed-off-by: Rishabh Maurya --- .../opensearch/index/mapper/DerivedField.java | 81 ++++++++++++++++++- .../search/builder/SearchSourceBuilder.java | 31 +++++++ .../builder/SearchSourceBuilderTests.java | 64 +++++++++++++++ 3 files changed, 172 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedField.java b/server/src/main/java/org/opensearch/index/mapper/DerivedField.java index 7ebe4e5f0b0e8..b502e41cbb97b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedField.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedField.java @@ -8,6 +8,7 @@ package org.opensearch.index.mapper; +import org.opensearch.Version; import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -18,6 +19,7 @@ import org.opensearch.script.Script; import java.io.IOException; +import java.util.Map; import java.util.Objects; /** @@ -25,10 +27,13 @@ */ @PublicApi(since = "2.14.0") public class DerivedField implements Writeable, ToXContentFragment { - private final String name; private final String type; private final Script script; + private String sourceIndexedField; + private Map properties; + private Boolean ignoreMalformed; + private String format; public DerivedField(String name, String type, Script script) { this.name = name; @@ -40,6 +45,14 @@ public DerivedField(StreamInput in) throws IOException { name = in.readString(); type = in.readString(); script = new Script(in); + if (in.getVersion().onOrAfter(Version.V_2_15_0)) { + if (in.readBoolean()) { + properties = in.readMap(); + } + sourceIndexedField = in.readOptionalString(); + format = in.readOptionalString(); + ignoreMalformed = in.readOptionalBoolean(); + } } @Override @@ -47,6 +60,17 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(name); out.writeString(type); script.writeTo(out); + if (out.getVersion().onOrAfter(Version.V_2_15_0)) { + if (properties == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeMap(properties); + } + out.writeOptionalString(sourceIndexedField); + out.writeOptionalString(format); + out.writeOptionalBoolean(ignoreMalformed); + } } @Override @@ -54,6 +78,18 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par builder.startObject(name); builder.field("type", type); builder.field("script", script); + if (properties != null) { + builder.field("properties", properties); + } + if (sourceIndexedField != null) { + builder.field("source_indexed_field", sourceIndexedField); + } + if (format != null) { + builder.field("format", format); + } + if (ignoreMalformed != null) { + builder.field("ignore_malformed", ignoreMalformed); + } builder.endObject(); return builder; } @@ -70,9 +106,41 @@ public Script getScript() { return script; } + public Map getProperties() { + return properties; + } + + public String getSourceIndexedField() { + return sourceIndexedField; + } + + public String getFormat() { + return format; + } + + public boolean getIgnoreMalformed() { + return Boolean.TRUE.equals(ignoreMalformed); + } + + public void setProperties(Map properties) { + this.properties = properties; + } + + public void setSourceIndexedField(String sourceIndexedField) { + this.sourceIndexedField = sourceIndexedField; + } + + public void setFormat(String format) { + this.format = format; + } + + public void setIgnoreMalformed(boolean ignoreMalformed) { + this.ignoreMalformed = ignoreMalformed; + } + @Override public int hashCode() { - return Objects.hash(name, type, script); + return Objects.hash(name, type, script, sourceIndexedField, properties, ignoreMalformed, format); } @Override @@ -84,7 +152,12 @@ public boolean equals(Object obj) { return false; } DerivedField other = (DerivedField) obj; - return Objects.equals(name, other.name) && Objects.equals(type, other.type) && Objects.equals(script, other.script); + return Objects.equals(name, other.name) + && Objects.equals(type, other.type) + && Objects.equals(script, other.script) + && Objects.equals(sourceIndexedField, other.sourceIndexedField) + && Objects.equals(properties, other.properties) + && Objects.equals(ignoreMalformed, other.ignoreMalformed) + && Objects.equals(format, other.format); } - } diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java index 07248a0719c3a..6c22567d8cf0d 100644 --- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java @@ -1004,6 +1004,37 @@ public SearchSourceBuilder derivedField(String name, String type, Script script) return this; } + /** + * Adds a derived field with the given name with provided type, script and other parameters + * @param name name of the derived field + * @param type type of the derived field + * @param script script associated with derived field + * @param properties map of field name and type of field for nested fields within object derived field + * @param sourceIndexedField source text field which is indexed to filter documents for better performance + * @param format date format + * @param ignoreMalformed ignores malformed fields instead of failing search request + */ + public SearchSourceBuilder derivedField( + String name, + String type, + Script script, + Map properties, + String sourceIndexedField, + String format, + Boolean ignoreMalformed + ) { + if (derivedFields == null) { + derivedFields = new ArrayList<>(); + } + DerivedField derivedField = new DerivedField(name, type, script); + derivedField.setProperties(properties); + derivedField.setSourceIndexedField(sourceIndexedField); + derivedField.setFormat(format); + derivedField.setIgnoreMalformed(ignoreMalformed); + derivedFields.add(derivedField); + return this; + } + /** * Sets the boost a specific index or alias will receive when the query is executed * against it. diff --git a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java index 5b1035e24185d..fd3dd8c12e84e 100644 --- a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java +++ b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java @@ -357,6 +357,70 @@ public void testDerivedFieldsParsingAndSerialization() throws IOException { } + public void testDerivedFieldsParsingAndSerializationObjectType() throws IOException { + { + String restContent = "{\n" + + " \"derived\": {\n" + + " \"duration\": {\n" + + " \"type\": \"long\",\n" + + " \"script\": \"emit(doc['test'])\"\n" + + " },\n" + + " \"ip_from_message\": {\n" + + " \"type\": \"keyword\",\n" + + " \"script\": \"emit(doc['message'])\"\n" + + " },\n" + + " \"object\": {\n" + + " \"type\": \"object\",\n" + + " \"script\": \"emit(doc['test'])\",\n" + + " \"format\": \"dd-MM-yyyy\",\n" + + " \"source_indexed_field\": \"test\",\n" + + " \"ignore_malformed\": true,\n" + + " \"properties\": {\n" + + " \"sub_field\": \"text\"\n" + + " }\n" + + " }\n" + + " },\n" + + " \"query\" : {\n" + + " \"match\": { \"content\": { \"query\": \"foo bar\" }}\n" + + " }\n" + + "}"; + + String expectedContent = + "{\"query\":{\"match\":{\"content\":{\"query\":\"foo bar\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"derived\":{\"duration\":{\"type\":\"long\",\"script\":\"emit(doc['test'])\"},\"ip_from_message\":{\"type\":\"keyword\",\"script\":\"emit(doc['message'])\"},\"object\":{\"format\":\"dd-MM-yyyy\",\"source_indexed_field\":\"test\",\"ignore_malformed\":true,\"type\":\"object\",\"script\":\"emit(doc['test'])\",\"properties\":{\"sub_field\":\"text\"}},\"derived_field\":{\"type\":\"object\",\"script\":{\"source\":\"emit(doc['message']\",\"lang\":\"painless\"},\"properties\":{\"sub_field_2\":\"keyword\"},\"source_indexed_field\":\"message\",\"format\":\"dd-MM-yyyy\",\"ignore_malformed\":true}}}"; + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); + searchSourceBuilder.derivedField( + "derived_field", + "object", + new Script("emit(doc['message']"), + Map.of("sub_field_2", "keyword"), + "message", + "dd-MM-yyyy", + true + ); + searchSourceBuilder = rewrite(searchSourceBuilder); + assertEquals(3, searchSourceBuilder.getDerivedFieldsObject().size()); + assertEquals(1, searchSourceBuilder.getDerivedFields().size()); + assertEquals(1, searchSourceBuilder.getDerivedFields().get(0).getProperties().size()); + assertEquals("message", searchSourceBuilder.getDerivedFields().get(0).getSourceIndexedField()); + assertEquals("dd-MM-yyyy", searchSourceBuilder.getDerivedFields().get(0).getFormat()); + assertTrue(searchSourceBuilder.getDerivedFields().get(0).getIgnoreMalformed()); + + try (BytesStreamOutput output = new BytesStreamOutput()) { + searchSourceBuilder.writeTo(output); + try (StreamInput in = new NamedWriteableAwareStreamInput(output.bytes().streamInput(), namedWriteableRegistry)) { + SearchSourceBuilder deserializedBuilder = new SearchSourceBuilder(in); + String actualContent = deserializedBuilder.toString(); + assertEquals(expectedContent, actualContent); + assertEquals(searchSourceBuilder.hashCode(), deserializedBuilder.hashCode()); + assertNotSame(searchSourceBuilder, deserializedBuilder); + } + } + } + } + } + public void testAggsParsing() throws IOException { { String restContent = "{\n" From 8fce7e68bb9ca076ece6ed6e779ae117a7ca11ee Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Mon, 3 Jun 2024 15:49:16 -0700 Subject: [PATCH 09/15] Update gradle config for develocity rebranding (#13942) The Gradle Enterprise plugin has been rebranded as the Develocity Plugin, resulting in a number of settings being deprecated. This commit updates the settings per [this migration guide][1] and removes all warnings from the build. [1]: https://docs.gradle.com/develocity/gradle-plugin/legacy/#develocity_migration Signed-off-by: Andrew Ross --- build.gradle | 11 ++++++++--- gradle.properties | 2 +- settings.gradle | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/build.gradle b/build.gradle index e92f396e006f5..55b31ca816214 100644 --- a/build.gradle +++ b/build.gradle @@ -55,7 +55,6 @@ plugins { id 'opensearch.docker-support' id 'opensearch.global-build-info' id "com.diffplug.spotless" version "6.25.0" apply false - id "org.gradle.test-retry" version "1.5.9" apply false id "test-report-aggregation" id 'jacoco-report-aggregation' } @@ -71,6 +70,13 @@ apply from: 'gradle/run.gradle' apply from: 'gradle/missing-javadoc.gradle' apply from: 'gradle/code-coverage.gradle' +// Disable unconditional publishing of build scans +develocity { + buildScan { + publishing.onlyIf { false } + } +} + // common maven publishing configuration allprojects { group = 'org.opensearch' @@ -462,9 +468,8 @@ gradle.projectsEvaluated { // test retry configuration subprojects { - apply plugin: "org.gradle.test-retry" tasks.withType(Test).configureEach { - retry { + develocity.testRetry { if (BuildParams.isCi()) { maxRetries = 3 maxFailures = 10 diff --git a/gradle.properties b/gradle.properties index 7c359ed2b652c..4e8c5b98116c1 100644 --- a/gradle.properties +++ b/gradle.properties @@ -22,7 +22,7 @@ org.gradle.jvmargs=-Xmx3g -XX:+HeapDumpOnOutOfMemoryError -Xss2m \ options.forkOptions.memoryMaximumSize=3g # Disable Gradle Enterprise Gradle plugin's test retry -systemProp.gradle.enterprise.testretry.enabled=false +systemProp.develocity.testretry.enabled.enabled=false # Disable duplicate project id detection # See https://docs.gradle.org/current/userguide/upgrading_version_6.html#duplicate_project_names_may_cause_publication_to_fail diff --git a/settings.gradle b/settings.gradle index f58a8b35b10eb..ca8538a967ef7 100644 --- a/settings.gradle +++ b/settings.gradle @@ -10,7 +10,7 @@ */ plugins { - id "com.gradle.enterprise" version "3.17.4" + id "com.gradle.develocity" version "3.17.4" } ext.disableBuildCache = hasProperty('DISABLE_BUILD_CACHE') || System.getenv().containsKey('DISABLE_BUILD_CACHE') From 74cadc074dd57557bcf07956b7adf4ec7b81ac01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:50:01 -0400 Subject: [PATCH 10/15] Bump actions/checkout from 3 to 4 (#13935) * Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Signed-off-by: Peter Nied Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] Co-authored-by: Peter Nied --- .github/workflows/gradle-check.yml | 2 +- CHANGELOG.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gradle-check.yml b/.github/workflows/gradle-check.yml index 0ba4dbe374fdd..07185ef4c65e3 100644 --- a/.github/workflows/gradle-check.yml +++ b/.github/workflows/gradle-check.yml @@ -17,7 +17,7 @@ jobs: outputs: RUN_GRADLE_CHECK: ${{ steps.changed-files-specific.outputs.any_changed }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get changed files id: changed-files-specific uses: tj-actions/changed-files@v44 diff --git a/CHANGELOG.md b/CHANGELOG.md index 3554565e737e8..5ce11dabb6e9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `reactor-netty` from 1.1.17 to 1.1.19 ([#13825](https://github.com/opensearch-project/OpenSearch/pull/13825)) - Bump `commons-cli:commons-cli` from 1.7.0 to 1.8.0 ([#13840](https://github.com/opensearch-project/OpenSearch/pull/13840)) - Bump `org.apache.xmlbeans:xmlbeans` from 5.2.0 to 5.2.1 ([#13839](https://github.com/opensearch-project/OpenSearch/pull/13839)) +- Bump `actions/checkout` from 3 to 4 ([#13935](https://github.com/opensearch-project/OpenSearch/pull/13935)) - Bump `com.netflix.nebula.ospackage-base` from 11.9.0 to 11.9.1 ([#13933](https://github.com/opensearch-project/OpenSearch/pull/13933)) ### Changed From e9834452505256f598f2247664c3be2c725e09f9 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Tue, 4 Jun 2024 13:02:17 -0700 Subject: [PATCH 11/15] Add DCO workflow (#13970) Signed-off-by: Prudhvi Godithi --- .github/workflows/dco.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/dco.yml diff --git a/.github/workflows/dco.yml b/.github/workflows/dco.yml new file mode 100644 index 0000000000000..9580d510fd108 --- /dev/null +++ b/.github/workflows/dco.yml @@ -0,0 +1,19 @@ +name: Developer Certificate of Origin Check + +on: [pull_request] + +jobs: + dco-check: + runs-on: ubuntu-latest + + steps: + - name: Get PR Commits + id: 'get-pr-commits' + uses: tim-actions/get-pr-commits@v1.1.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - name: DCO Check + uses: tim-actions/dco@v1.1.0 + with: + commits: ${{ steps.get-pr-commits.outputs.commits }} + From 867013b821e2287cf90f548b707e2d8dca3f487c Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 4 Jun 2024 17:56:12 -0400 Subject: [PATCH 12/15] Revert "Optimize remote state stale file deletion (#13131)" (#13969) This reverts commit b9befaa12524744ffe71d3f12699cb27cb8fca5e. Signed-off-by: Andriy Redko --- .../RemoteClusterStateCleanupManagerIT.java | 145 ------ .../remote/RemoteClusterStateServiceIT.java | 63 +++ .../RemoteStoreBaseIntegTestCase.java | 10 - .../common/settings/ClusterSettings.java | 2 - .../RemoteClusterStateCleanupManager.java | 408 ---------------- .../remote/RemoteClusterStateService.java | 275 +++++++++-- .../main/java/org/opensearch/node/Node.java | 7 +- .../GatewayMetaStatePersistedStateTests.java | 6 +- ...RemoteClusterStateCleanupManagerTests.java | 446 ------------------ .../RemoteClusterStateServiceTests.java | 136 +++++- 10 files changed, 442 insertions(+), 1056 deletions(-) delete mode 100644 server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java delete mode 100644 server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java delete mode 100644 server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java deleted file mode 100644 index 0f441fe01a368..0000000000000 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.gateway.remote; - -import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; -import org.opensearch.common.blobstore.BlobPath; -import org.opensearch.common.settings.Settings; -import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; -import org.opensearch.repositories.RepositoriesService; -import org.opensearch.repositories.blobstore.BlobStoreRepository; -import org.opensearch.test.OpenSearchIntegTestCase; -import org.junit.Before; - -import java.nio.charset.StandardCharsets; -import java.util.Base64; -import java.util.Map; -import java.util.concurrent.TimeUnit; - -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES; -import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; -import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING; - -@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class RemoteClusterStateCleanupManagerIT extends RemoteStoreBaseIntegTestCase { - - private static final String INDEX_NAME = "test-index"; - - @Before - public void setup() { - asyncUploadMockFsRepo = false; - } - - @Override - protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build(); - } - - private Map initialTestSetup(int shardCount, int replicaCount, int dataNodeCount, int clusterManagerNodeCount) { - prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); - Map indexStats = indexData(1, false, INDEX_NAME); - assertEquals(shardCount * (replicaCount + 1), getNumShards(INDEX_NAME).totalNumShards); - ensureGreen(INDEX_NAME); - return indexStats; - } - - public void testRemoteCleanupTaskUpdated() { - int shardCount = randomIntBetween(1, 2); - int replicaCount = 1; - int dataNodeCount = shardCount * (replicaCount + 1); - int clusterManagerNodeCount = 1; - - initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - RemoteClusterStateCleanupManager remoteClusterStateCleanupManager = internalCluster().getClusterManagerNodeInstance( - RemoteClusterStateCleanupManager.class - ); - - assertEquals(CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval()); - assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); - - // now disable - client().admin() - .cluster() - .prepareUpdateSettings() - .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), -1)) - .get(); - - assertEquals(-1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMillis()); - assertFalse(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); - - // now set Clean up interval to 1 min - client().admin() - .cluster() - .prepareUpdateSettings() - .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "1m")) - .get(); - assertEquals(1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMinutes()); - } - - public void testRemoteCleanupDeleteStale() throws Exception { - int shardCount = randomIntBetween(1, 2); - int replicaCount = 1; - int dataNodeCount = shardCount * (replicaCount + 1); - int clusterManagerNodeCount = 1; - - initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - - // set cleanup interval to 100 ms to make the test faster - ClusterUpdateSettingsResponse response = client().admin() - .cluster() - .prepareUpdateSettings() - .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "100ms")) - .get(); - - assertTrue(response.isAcknowledged()); - - // update cluster state 21 times to ensure that clean up has run after this will upload 42 manifest files - // to repository, if manifest files are less than that it means clean up has run - updateClusterStateNTimes(RETAINED_MANIFESTS + SKIP_CLEANUP_STATE_CHANGES + 1); - - RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class); - BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REPOSITORY_NAME); - BlobPath baseMetadataPath = repository.basePath() - .add( - Base64.getUrlEncoder() - .withoutPadding() - .encodeToString(getClusterState().getClusterName().value().getBytes(StandardCharsets.UTF_8)) - ) - .add("cluster-state") - .add(getClusterState().metadata().clusterUUID()); - BlobPath manifestContainerPath = baseMetadataPath.add("manifest"); - - assertBusy(() -> { - int manifestFiles = repository.blobStore().blobContainer(manifestContainerPath).listBlobsByPrefix("manifest").size(); - logger.info("number of current manifest file: {}", manifestFiles); - // we can't guarantee that we have same number of manifest as Retained manifest in our repo as there can be other queued task - // other than replica count change which can upload new manifest files, that's why we check that number of manifests is between - // Retained manifests and Retained manifests + 2 * Skip cleanup state changes (each cluster state update uploads 2 manifests) - assertTrue( - "Current number of manifest files: " + manifestFiles, - manifestFiles >= RETAINED_MANIFESTS && manifestFiles < RETAINED_MANIFESTS + 2 * SKIP_CLEANUP_STATE_CHANGES - ); - }, 500, TimeUnit.MILLISECONDS); - } - - private void updateClusterStateNTimes(int n) { - int newReplicaCount = randomIntBetween(0, 3); - for (int i = n; i > 0; i--) { - ClusterUpdateSettingsResponse response = client().admin() - .cluster() - .prepareUpdateSettings() - .setPersistentSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), i, TimeUnit.SECONDS)) - .get(); - assertTrue(response.isAcknowledged()); - } - } -} diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index ab2f0f0080566..42120aa32eb47 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -10,6 +10,7 @@ import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; import org.opensearch.discovery.DiscoveryStats; @@ -26,6 +27,7 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA; import static org.opensearch.gateway.remote.RemoteClusterStateService.CUSTOM_METADATA; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; @@ -49,6 +51,16 @@ protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build(); } + private void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { + internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes); + internalCluster().startDataOnlyNodes(numDataOnlyNodes); + for (String index : indices.split(",")) { + createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); + ensureYellowAndNoInitializingShards(index); + ensureGreen(index); + } + } + private Map initialTestSetup(int shardCount, int replicaCount, int dataNodeCount, int clusterManagerNodeCount) { prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); Map indexStats = indexData(1, false, INDEX_NAME); @@ -57,6 +69,49 @@ private Map initialTestSetup(int shardCount, int replicaCount, int return indexStats; } + public void testFullClusterRestoreStaleDelete() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + setReplicaCount(0); + setReplicaCount(2); + setReplicaCount(0); + setReplicaCount(1); + setReplicaCount(0); + setReplicaCount(1); + setReplicaCount(0); + setReplicaCount(2); + setReplicaCount(0); + + RemoteClusterStateService remoteClusterStateService = internalCluster().getClusterManagerNodeInstance( + RemoteClusterStateService.class + ); + + RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class); + + BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REPOSITORY_NAME); + BlobPath baseMetadataPath = repository.basePath() + .add( + Base64.getUrlEncoder() + .withoutPadding() + .encodeToString(getClusterState().getClusterName().value().getBytes(StandardCharsets.UTF_8)) + ) + .add("cluster-state") + .add(getClusterState().metadata().clusterUUID()); + + assertEquals(10, repository.blobStore().blobContainer(baseMetadataPath.add("manifest")).listBlobsByPrefix("manifest").size()); + + Map indexMetadataMap = remoteClusterStateService.getLatestClusterState( + cluster().getClusterName(), + getClusterState().metadata().clusterUUID() + ).getMetadata().getIndices(); + assertEquals(0, indexMetadataMap.values().stream().findFirst().get().getNumberOfReplicas()); + assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); + } + public void testRemoteStateStats() { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -186,4 +241,12 @@ private void validateNodesStatsResponse(NodesStatsResponse nodesStatsResponse) { assertNotNull(nodesStatsResponse.getNodes().get(0)); assertNotNull(nodesStatsResponse.getNodes().get(0).getDiscoveryStats()); } + + private void setReplicaCount(int replicaCount) { + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(SETTING_NUMBER_OF_REPLICAS, replicaCount)) + .get(); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index 64efcee6ef1b5..740aee69f7d80 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -350,14 +350,4 @@ protected void restore(boolean restoreAllShards, String... indices) { PlainActionFuture.newFuture() ); } - - protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes); - internalCluster().startDataOnlyNodes(numDataOnlyNodes); - for (String index : indices.split(",")) { - createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); - ensureYellowAndNoInitializingShards(index); - ensureGreen(index); - } - } } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 297fc98764d07..7814518af471b 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -104,7 +104,6 @@ import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.PersistedClusterStateService; import org.opensearch.gateway.ShardsBatchGatewayAllocator; -import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpTransportSettings; import org.opensearch.index.IndexModule; @@ -712,7 +711,6 @@ public void apply(Settings value, Settings current, Settings previous) { SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_LEVEL, // Remote cluster state settings - RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java deleted file mode 100644 index 8a106a25e5630..0000000000000 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java +++ /dev/null @@ -1,408 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.gateway.remote; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.message.ParameterizedMessage; -import org.apache.logging.log4j.util.Strings; -import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.service.ClusterApplierService; -import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.blobstore.BlobMetadata; -import org.opensearch.common.blobstore.BlobPath; -import org.opensearch.common.settings.ClusterSettings; -import org.opensearch.common.settings.Setting; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.common.util.concurrent.AbstractAsyncTask; -import org.opensearch.core.action.ActionListener; -import org.opensearch.index.translog.transfer.BlobStoreTransferService; -import org.opensearch.threadpool.ThreadPool; - -import java.io.Closeable; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; - -import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_FORMAT; -import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_PATH_TOKEN; -import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_FORMAT; -import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_PATH_TOKEN; -import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; -import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_PATH_TOKEN; - -/** - * A Manager which provides APIs to clean up stale cluster state files and runs an async stale cleanup task - * - * @opensearch.internal - */ -public class RemoteClusterStateCleanupManager implements Closeable { - - public static final int RETAINED_MANIFESTS = 10; - public static final int SKIP_CLEANUP_STATE_CHANGES = 10; - public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT = TimeValue.timeValueMinutes(5); - public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM = TimeValue.MINUS_ONE; - - /** - * Setting to specify the interval to do run stale file cleanup job - * Min value -1 indicates that the stale file cleanup job should be disabled - */ - public static final Setting REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING = Setting.timeSetting( - "cluster.remote_store.state.cleanup_interval", - CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, - CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM, - Setting.Property.NodeScope, - Setting.Property.Dynamic - ); - private static final Logger logger = LogManager.getLogger(RemoteClusterStateCleanupManager.class); - private final RemoteClusterStateService remoteClusterStateService; - private final RemotePersistenceStats remoteStateStats; - private BlobStoreTransferService blobStoreTransferService; - private TimeValue staleFileCleanupInterval; - private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); - private AsyncStaleFileDeletion staleFileDeletionTask; - private long lastCleanupAttemptStateVersion; - private final ThreadPool threadpool; - private final ClusterApplierService clusterApplierService; - - public RemoteClusterStateCleanupManager(RemoteClusterStateService remoteClusterStateService, ClusterService clusterService) { - this.remoteClusterStateService = remoteClusterStateService; - this.remoteStateStats = remoteClusterStateService.getStats(); - ClusterSettings clusterSettings = clusterService.getClusterSettings(); - this.clusterApplierService = clusterService.getClusterApplierService(); - this.staleFileCleanupInterval = clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING); - this.threadpool = remoteClusterStateService.getThreadpool(); - // initialize with 0, a cleanup will be done when this node is elected master node and version is incremented more than threshold - this.lastCleanupAttemptStateVersion = 0; - clusterSettings.addSettingsUpdateConsumer(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, this::updateCleanupInterval); - } - - void start() { - staleFileDeletionTask = new AsyncStaleFileDeletion(this); - } - - @Override - public void close() throws IOException { - if (staleFileDeletionTask != null) { - staleFileDeletionTask.close(); - } - } - - private BlobStoreTransferService getBlobStoreTransferService() { - if (blobStoreTransferService == null) { - blobStoreTransferService = new BlobStoreTransferService(remoteClusterStateService.getBlobStore(), threadpool); - } - return blobStoreTransferService; - } - - private void updateCleanupInterval(TimeValue updatedInterval) { - this.staleFileCleanupInterval = updatedInterval; - logger.info("updated remote state cleanup interval to {}", updatedInterval); - // After updating the interval, we need to close the current task and create a new one which will run with updated interval - if (staleFileDeletionTask != null && !staleFileDeletionTask.getInterval().equals(updatedInterval)) { - staleFileDeletionTask.setInterval(updatedInterval); - } - } - - // visible for testing - void cleanUpStaleFiles() { - ClusterState currentAppliedState = clusterApplierService.state(); - if (currentAppliedState.nodes().isLocalNodeElectedClusterManager()) { - long cleanUpAttemptStateVersion = currentAppliedState.version(); - assert Strings.isNotEmpty(currentAppliedState.getClusterName().value()) : "cluster name is not set"; - assert Strings.isNotEmpty(currentAppliedState.metadata().clusterUUID()) : "cluster uuid is not set"; - if (cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion > SKIP_CLEANUP_STATE_CHANGES) { - logger.info( - "Cleaning up stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates", - currentAppliedState.getClusterName().value(), - currentAppliedState.metadata().clusterUUID(), - cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion - ); - this.deleteStaleClusterMetadata( - currentAppliedState.getClusterName().value(), - currentAppliedState.metadata().clusterUUID(), - RETAINED_MANIFESTS - ); - lastCleanupAttemptStateVersion = cleanUpAttemptStateVersion; - } else { - logger.debug( - "Skipping cleanup of stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates, which is less than threshold {}", - currentAppliedState.getClusterName().value(), - currentAppliedState.metadata().clusterUUID(), - cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion, - SKIP_CLEANUP_STATE_CHANGES - ); - } - } else { - logger.debug("Skipping cleanup task as local node is not elected Cluster Manager"); - } - } - - private void addStaleGlobalMetadataPath(String fileName, Set filesToKeep, Set staleGlobalMetadataPaths) { - if (!filesToKeep.contains(fileName)) { - String[] splitPath = fileName.split("/"); - staleGlobalMetadataPaths.add( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( - splitPath[splitPath.length - 1] - ) - ); - } - } - - // visible for testing - void deleteClusterMetadata( - String clusterName, - String clusterUUID, - List activeManifestBlobMetadata, - List staleManifestBlobMetadata - ) { - try { - Set filesToKeep = new HashSet<>(); - Set staleManifestPaths = new HashSet<>(); - Set staleIndexMetadataPaths = new HashSet<>(); - Set staleGlobalMetadataPaths = new HashSet<>(); - activeManifestBlobMetadata.forEach(blobMetadata -> { - ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.fetchRemoteClusterMetadataManifest( - clusterName, - clusterUUID, - blobMetadata.name() - ); - clusterMetadataManifest.getIndices() - .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); - if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) { - filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); - } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) { - filesToKeep.add(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()); - filesToKeep.add(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()); - filesToKeep.add(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()); - clusterMetadataManifest.getCustomMetadataMap() - .values() - .forEach(attribute -> filesToKeep.add(attribute.getUploadedFilename())); - } - }); - staleManifestBlobMetadata.forEach(blobMetadata -> { - ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.fetchRemoteClusterMetadataManifest( - clusterName, - clusterUUID, - blobMetadata.name() - ); - staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); - if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) { - addStaleGlobalMetadataPath(clusterMetadataManifest.getGlobalMetadataFileName(), filesToKeep, staleGlobalMetadataPaths); - } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) { - addStaleGlobalMetadataPath( - clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename(), - filesToKeep, - staleGlobalMetadataPaths - ); - addStaleGlobalMetadataPath( - clusterMetadataManifest.getSettingsMetadata().getUploadedFilename(), - filesToKeep, - staleGlobalMetadataPaths - ); - addStaleGlobalMetadataPath( - clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename(), - filesToKeep, - staleGlobalMetadataPaths - ); - clusterMetadataManifest.getCustomMetadataMap() - .values() - .forEach( - attribute -> addStaleGlobalMetadataPath(attribute.getUploadedFilename(), filesToKeep, staleGlobalMetadataPaths) - ); - } - - clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { - if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { - staleIndexMetadataPaths.add( - new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() - + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) - ); - } - }); - }); - - if (staleManifestPaths.isEmpty()) { - logger.debug("No stale Remote Cluster Metadata files found"); - return; - } - - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); - } catch (IllegalStateException e) { - logger.error("Error while fetching Remote Cluster Metadata manifests", e); - } catch (IOException e) { - logger.error("Error while deleting stale Remote Cluster Metadata files", e); - remoteStateStats.cleanUpAttemptFailed(); - } catch (Exception e) { - logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); - remoteStateStats.cleanUpAttemptFailed(); - } - } - - /** - * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests - * - * @param clusterName name of the cluster - * @param clusterUUID uuid of cluster state to refer to in remote - * @param manifestsToRetain no of latest manifest files to keep in remote - */ - // package private for testing - void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { - if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { - logger.info("Delete stale cluster metadata task is already in progress."); - return; - } - try { - getBlobStoreTransferService().listAllInSortedOrderAsync( - ThreadPool.Names.REMOTE_PURGE, - remoteClusterStateService.getManifestFolderPath(clusterName, clusterUUID), - MANIFEST_FILE_PREFIX, - Integer.MAX_VALUE, - new ActionListener<>() { - @Override - public void onResponse(List blobMetadata) { - if (blobMetadata.size() > manifestsToRetain) { - deleteClusterMetadata( - clusterName, - clusterUUID, - blobMetadata.subList(0, manifestsToRetain), - blobMetadata.subList(manifestsToRetain, blobMetadata.size()) - ); - } - deleteStaleMetadataRunning.set(false); - } - - @Override - public void onFailure(Exception e) { - logger.error( - new ParameterizedMessage( - "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}", - clusterUUID - ) - ); - deleteStaleMetadataRunning.set(false); - } - } - ); - } catch (Exception e) { - deleteStaleMetadataRunning.set(false); - throw e; - } - } - - /** - * Purges all remote cluster state against provided cluster UUIDs - * - * @param clusterName name of the cluster - * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged - */ - void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { - clusterUUIDs.forEach( - clusterUUID -> getBlobStoreTransferService().deleteAsync( - ThreadPool.Names.REMOTE_PURGE, - remoteClusterStateService.getCusterMetadataBasePath(clusterName, clusterUUID), - new ActionListener<>() { - @Override - public void onResponse(Void unused) { - logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID); - } - - @Override - public void onFailure(Exception e) { - logger.error( - new ParameterizedMessage( - "Exception occurred while deleting all remote cluster metadata for cluster UUID {}", - clusterUUID - ), - e - ); - remoteStateStats.cleanUpAttemptFailed(); - } - } - ) - ); - } - - // package private for testing - void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException { - logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths)); - getBlobStoreTransferService().deleteBlobs( - remoteClusterStateService.getCusterMetadataBasePath(clusterName, clusterUUID), - stalePaths - ); - } - - /** - * Purges all remote cluster state against provided cluster UUIDs - * @param clusterState current state of the cluster - * @param committedManifest last committed ClusterMetadataManifest - */ - public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) { - threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> { - String clusterName = clusterState.getClusterName().value(); - logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); - Set allClustersUUIDsInRemote; - try { - allClustersUUIDsInRemote = new HashSet<>( - remoteClusterStateService.getAllClusterUUIDs(clusterState.getClusterName().value()) - ); - } catch (IOException e) { - logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName)); - return; - } - // Retain last 2 cluster uuids data - allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID()); - allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID()); - deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); - }); - } - - public TimeValue getStaleFileCleanupInterval() { - return this.staleFileCleanupInterval; - } - - AsyncStaleFileDeletion getStaleFileDeletionTask() { // for testing - return this.staleFileDeletionTask; - } - - RemotePersistenceStats getStats() { - return this.remoteStateStats; - } - - static final class AsyncStaleFileDeletion extends AbstractAsyncTask { - private final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; - - AsyncStaleFileDeletion(RemoteClusterStateCleanupManager remoteClusterStateCleanupManager) { - super( - logger, - remoteClusterStateCleanupManager.threadpool, - remoteClusterStateCleanupManager.getStaleFileCleanupInterval(), - true - ); - this.remoteClusterStateCleanupManager = remoteClusterStateCleanupManager; - rescheduleIfNecessary(); - } - - @Override - protected boolean mustReschedule() { - return true; - } - - @Override - protected void runInternal() { - remoteClusterStateCleanupManager.cleanUpStaleFiles(); - } - } -} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 01ffd8f1cca46..0f862d1b68820 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -18,13 +18,11 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.TemplatesMetadata; -import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedRunnable; import org.opensearch.common.Nullable; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; -import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; @@ -61,6 +59,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.LongSupplier; @@ -82,6 +81,8 @@ public class RemoteClusterStateService implements Closeable { public static final String METADATA_MANIFEST_NAME_FORMAT = "%s"; + public static final int RETAINED_MANIFESTS = 10; + public static final String DELIMITER = "__"; public static final String CUSTOM_DELIMITER = "--"; @@ -206,7 +207,8 @@ public class RemoteClusterStateService implements Closeable { private volatile TimeValue indexMetadataUploadTimeout; private volatile TimeValue globalMetadataUploadTimeout; private volatile TimeValue metadataManifestUploadTimeout; - private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; + + private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); private final RemotePersistenceStats remoteStateStats; private final String CLUSTER_STATE_UPLOAD_TIME_LOG_STRING = "writing cluster state for version [{}] took [{}ms]"; private final String METADATA_UPDATE_LOG_STRING = "wrote metadata for [{}] indices and skipped [{}] unchanged " @@ -230,7 +232,7 @@ public RemoteClusterStateService( String nodeId, Supplier repositoriesService, Settings settings, - ClusterService clusterService, + ClusterSettings clusterSettings, LongSupplier relativeTimeNanosSupplier, ThreadPool threadPool, List indexMetadataUploadListeners @@ -241,7 +243,6 @@ public RemoteClusterStateService( this.settings = settings; this.relativeTimeNanosSupplier = relativeTimeNanosSupplier; this.threadpool = threadPool; - ClusterSettings clusterSettings = clusterService.getClusterSettings(); this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD); this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); @@ -251,10 +252,16 @@ public RemoteClusterStateService( clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout); this.remoteStateStats = new RemotePersistenceStats(); - this.remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(this, clusterService); this.indexMetadataUploadListeners = indexMetadataUploadListeners; } + private BlobStoreTransferService getBlobStoreTransferService() { + if (blobStoreTransferService == null) { + blobStoreTransferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), threadpool); + } + return blobStoreTransferService; + } + /** * This method uploads entire cluster state metadata to the configured blob store. For now only index metadata upload is supported. This method should be * invoked by the elected cluster manager when the remote cluster state is enabled. @@ -410,6 +417,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( : previousManifest.getCustomMetadataMap(), false ); + deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); remoteStateStats.stateSucceeded(); @@ -713,10 +721,6 @@ private CheckedRunnable getAsyncMetadataWriteAction( ); } - public RemoteClusterStateCleanupManager getCleanupManager() { - return remoteClusterStateCleanupManager; - } - @Nullable public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) throws IOException { @@ -736,16 +740,12 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat previousManifest.getCustomMetadataMap(), true ); - if (!previousManifest.isClusterUUIDCommitted() && committedManifest.isClusterUUIDCommitted()) { - remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifest); - } - + deleteStaleClusterUUIDs(clusterState, committedManifest); return committedManifest; } @Override public void close() throws IOException { - remoteClusterStateCleanupManager.close(); if (blobStoreRepository != null) { IOUtils.close(blobStoreRepository); } @@ -760,7 +760,6 @@ public void start() { final Repository repository = repositoriesService.get().repository(remoteStoreRepo); assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; blobStoreRepository = (BlobStoreRepository) repository; - remoteClusterStateCleanupManager.start(); } private ClusterMetadataManifest uploadManifest( @@ -851,14 +850,6 @@ private void writeMetadataManifest(String clusterName, String clusterUUID, Clust ); } - ThreadPool getThreadpool() { - return threadpool; - } - - BlobStore getBlobStore() { - return blobStoreRepository.blobStore(); - } - private BlobContainer indexMetadataContainer(String clusterName, String clusterUUID, String indexUUID) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX return blobStoreRepository.blobStore() @@ -876,7 +867,7 @@ private BlobContainer manifestContainer(String clusterName, String clusterUUID) return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID)); } - BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) { + private BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) { return blobStoreRepository.basePath().add(encodeString(clusterName)).add(CLUSTER_STATE_PATH_TOKEN).add(clusterUUID); } @@ -991,7 +982,7 @@ private static String metadataAttributeFileName(String componentPrefix, Long met ); } - BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { + private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { return getCusterMetadataBasePath(clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN); } @@ -1244,7 +1235,7 @@ public String getLastKnownUUIDFromRemote(String clusterName) { } } - Set getAllClusterUUIDs(String clusterName) throws IOException { + private Set getAllClusterUUIDs(String clusterName) throws IOException { Map clusterUUIDMetadata = clusterUUIDContainer(clusterName).children(); if (clusterUUIDMetadata == null) { return Collections.emptySet(); @@ -1435,7 +1426,7 @@ private Optional getLatestManifestFileName(String clusterName, String cl * @param clusterName name of the cluster * @return ClusterMetadataManifest */ - ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) + private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) throws IllegalStateException { try { return getClusterMetadataManifestBlobStoreFormat(filename).read( @@ -1495,6 +1486,234 @@ public RemoteStateTransferException(String errorDesc, Throwable cause) { } } + /** + * Purges all remote cluster state against provided cluster UUIDs + * + * @param clusterName name of the cluster + * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged + */ + void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { + clusterUUIDs.forEach(clusterUUID -> { + getBlobStoreTransferService().deleteAsync( + ThreadPool.Names.REMOTE_PURGE, + getCusterMetadataBasePath(clusterName, clusterUUID), + new ActionListener<>() { + @Override + public void onResponse(Void unused) { + logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID); + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "Exception occurred while deleting all remote cluster metadata for cluster UUID {}", + clusterUUID + ), + e + ); + remoteStateStats.cleanUpAttemptFailed(); + } + } + ); + }); + } + + /** + * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests + * + * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote + * @param manifestsToRetain no of latest manifest files to keep in remote + */ + // package private for testing + void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { + if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { + logger.info("Delete stale cluster metadata task is already in progress."); + return; + } + try { + getBlobStoreTransferService().listAllInSortedOrderAsync( + ThreadPool.Names.REMOTE_PURGE, + getManifestFolderPath(clusterName, clusterUUID), + "manifest", + Integer.MAX_VALUE, + new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + if (blobMetadata.size() > manifestsToRetain) { + deleteClusterMetadata( + clusterName, + clusterUUID, + blobMetadata.subList(0, manifestsToRetain - 1), + blobMetadata.subList(manifestsToRetain - 1, blobMetadata.size()) + ); + } + deleteStaleMetadataRunning.set(false); + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}", + clusterUUID + ) + ); + deleteStaleMetadataRunning.set(false); + } + } + ); + } catch (Exception e) { + deleteStaleMetadataRunning.set(false); + throw e; + } + } + + private void deleteClusterMetadata( + String clusterName, + String clusterUUID, + List activeManifestBlobMetadata, + List staleManifestBlobMetadata + ) { + try { + Set filesToKeep = new HashSet<>(); + Set staleManifestPaths = new HashSet<>(); + Set staleIndexMetadataPaths = new HashSet<>(); + Set staleGlobalMetadataPaths = new HashSet<>(); + activeManifestBlobMetadata.forEach(blobMetadata -> { + ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( + clusterName, + clusterUUID, + blobMetadata.name() + ); + clusterMetadataManifest.getIndices() + .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); + if (clusterMetadataManifest.getGlobalMetadataFileName() != null) { + filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); + } else { + filesToKeep.add(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()); + filesToKeep.add(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()); + filesToKeep.add(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()); + clusterMetadataManifest.getCustomMetadataMap() + .forEach((key, value) -> { filesToKeep.add(value.getUploadedFilename()); }); + } + }); + staleManifestBlobMetadata.forEach(blobMetadata -> { + ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( + clusterName, + clusterUUID, + blobMetadata.name() + ); + staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); + if (clusterMetadataManifest.getGlobalMetadataFileName() != null) { + if (filesToKeep.contains(clusterMetadataManifest.getGlobalMetadataFileName()) == false) { + String[] globalMetadataSplitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + globalMetadataSplitPath[globalMetadataSplitPath.length - 1] + ) + ); + } + } else { + if (filesToKeep.contains(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()) == false) { + String[] coordinationMetadataSplitPath = clusterMetadataManifest.getCoordinationMetadata() + .getUploadedFilename() + .split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + coordinationMetadataSplitPath[coordinationMetadataSplitPath.length - 1] + ) + ); + } + if (filesToKeep.contains(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()) == false) { + String[] templatesMetadataSplitPath = clusterMetadataManifest.getTemplatesMetadata() + .getUploadedFilename() + .split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + templatesMetadataSplitPath[templatesMetadataSplitPath.length - 1] + ) + ); + } + if (filesToKeep.contains(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()) == false) { + String[] settingsMetadataSplitPath = clusterMetadataManifest.getSettingsMetadata().getUploadedFilename().split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + settingsMetadataSplitPath[settingsMetadataSplitPath.length - 1] + ) + ); + } + clusterMetadataManifest.getCustomMetadataMap().forEach((key, value) -> { + if (filesToKeep.contains(value.getUploadedFilename()) == false) { + String[] customMetadataSplitPath = value.getUploadedFilename().split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + customMetadataSplitPath[customMetadataSplitPath.length - 1] + ) + ); + } + }); + } + + clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { + if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { + staleIndexMetadataPaths.add( + new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() + + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) + ); + } + }); + }); + + if (staleManifestPaths.isEmpty()) { + logger.debug("No stale Remote Cluster Metadata files found"); + return; + } + + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); + } catch (IllegalStateException e) { + logger.error("Error while fetching Remote Cluster Metadata manifests", e); + } catch (IOException e) { + logger.error("Error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); + } catch (Exception e) { + logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); + } + } + + private void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException { + logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths)); + getBlobStoreTransferService().deleteBlobs(getCusterMetadataBasePath(clusterName, clusterUUID), stalePaths); + } + + /** + * Purges all remote cluster state against provided cluster UUIDs + * + * @param clusterState current state of the cluster + * @param committedManifest last committed ClusterMetadataManifest + */ + public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) { + threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> { + String clusterName = clusterState.getClusterName().value(); + logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); + Set allClustersUUIDsInRemote; + try { + allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value())); + } catch (IOException e) { + logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName)); + return; + } + // Retain last 2 cluster uuids data + allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID()); + allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID()); + deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); + }); + } + public RemotePersistenceStats getStats() { return remoteStateStats; } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 49545fa8a0c8b..76109ba10624a 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -138,7 +138,6 @@ import org.opensearch.gateway.MetaStateService; import org.opensearch.gateway.PersistedClusterStateService; import org.opensearch.gateway.ShardsBatchGatewayAllocator; -import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpServerTransport; import org.opensearch.identity.IdentityService; @@ -753,7 +752,6 @@ protected Node( threadPool::relativeTimeInMillis ); final RemoteClusterStateService remoteClusterStateService; - final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; final RemoteIndexPathUploader remoteIndexPathUploader; if (isRemoteStoreClusterStateEnabled(settings)) { remoteIndexPathUploader = new RemoteIndexPathUploader( @@ -766,16 +764,14 @@ protected Node( nodeEnvironment.nodeId(), repositoriesServiceReference::get, settings, - clusterService, + clusterService.getClusterSettings(), threadPool::preciseRelativeTimeInNanos, threadPool, List.of(remoteIndexPathUploader) ); - remoteClusterStateCleanupManager = remoteClusterStateService.getCleanupManager(); } else { remoteClusterStateService = null; remoteIndexPathUploader = null; - remoteClusterStateCleanupManager = null; } // collect engine factory providers from plugins @@ -1380,7 +1376,6 @@ protected Node( b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(RemoteIndexPathUploader.class).toProvider(() -> remoteIndexPathUploader); - b.bind(RemoteClusterStateCleanupManager.class).toProvider(() -> remoteClusterStateCleanupManager); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 418e6d8de6adb..3ba98c44f8d3e 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -462,7 +462,9 @@ public void testDataOnlyNodePersistence() throws Exception { }); when(transportService.getThreadPool()).thenReturn(threadPool); ClusterService clusterService = mock(ClusterService.class); - when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); final PersistedClusterStateService persistedClusterStateService = new PersistedClusterStateService( nodeEnvironment, xContentRegistry(), @@ -485,7 +487,7 @@ public void testDataOnlyNodePersistence() throws Exception { nodeEnvironment.nodeId(), repositoriesServiceSupplier, settings, - clusterService, + clusterSettings, () -> 0L, threadPool, List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java deleted file mode 100644 index 24fd1b164a4ff..0000000000000 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java +++ /dev/null @@ -1,446 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.gateway.remote; - -import org.opensearch.cluster.ClusterName; -import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.metadata.Metadata; -import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.service.ClusterApplierService; -import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.blobstore.BlobContainer; -import org.opensearch.common.blobstore.BlobMetadata; -import org.opensearch.common.blobstore.BlobPath; -import org.opensearch.common.blobstore.BlobStore; -import org.opensearch.common.blobstore.support.PlainBlobMetadata; -import org.opensearch.common.settings.ClusterSettings; -import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.concurrent.AbstractAsyncTask; -import org.opensearch.core.action.ActionListener; -import org.opensearch.repositories.RepositoriesService; -import org.opensearch.repositories.blobstore.BlobStoreRepository; -import org.opensearch.repositories.fs.FsRepository; -import org.opensearch.test.OpenSearchTestCase; -import org.opensearch.test.VersionUtils; -import org.opensearch.threadpool.TestThreadPool; -import org.opensearch.threadpool.ThreadPool; -import org.junit.After; -import org.junit.Before; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Supplier; - -import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V1; -import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; -import static org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; -import static org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.AsyncStaleFileDeletion; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS; -import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES; -import static org.opensearch.gateway.remote.RemoteClusterStateService.CLUSTER_STATE_PATH_TOKEN; -import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA; -import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; -import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_PATH_TOKEN; -import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_PATH_TOKEN; -import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; -import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_PATH_TOKEN; -import static org.opensearch.gateway.remote.RemoteClusterStateService.SETTING_METADATA; -import static org.opensearch.gateway.remote.RemoteClusterStateService.TEMPLATES_METADATA; -import static org.opensearch.gateway.remote.RemoteClusterStateService.encodeString; -import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.generateClusterStateWithOneIndex; -import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.nodesWithLocalNodeClusterManager; -import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; -import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; -import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -public class RemoteClusterStateCleanupManagerTests extends OpenSearchTestCase { - private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; - private Supplier repositoriesServiceSupplier; - private RepositoriesService repositoriesService; - private BlobStoreRepository blobStoreRepository; - private BlobStore blobStore; - private ClusterSettings clusterSettings; - private ClusterApplierService clusterApplierService; - private ClusterState clusterState; - private Metadata metadata; - private RemoteClusterStateService remoteClusterStateService; - private final ThreadPool threadPool = new TestThreadPool(getClass().getName()); - - @Before - public void setup() { - repositoriesServiceSupplier = mock(Supplier.class); - repositoriesService = mock(RepositoriesService.class); - when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService); - - String stateRepoTypeAttributeKey = String.format( - Locale.getDefault(), - "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, - "remote_store_repository" - ); - String stateRepoSettingsAttributeKeyPrefix = String.format( - Locale.getDefault(), - "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, - "remote_store_repository" - ); - - Settings settings = Settings.builder() - .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") - .put(stateRepoTypeAttributeKey, FsRepository.TYPE) - .put(stateRepoSettingsAttributeKeyPrefix + "location", "randomRepoPath") - .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) - .build(); - - clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - clusterApplierService = mock(ClusterApplierService.class); - clusterState = mock(ClusterState.class); - metadata = mock(Metadata.class); - ClusterService clusterService = mock(ClusterService.class); - when(clusterService.getClusterSettings()).thenReturn(clusterSettings); - when(clusterState.getClusterName()).thenReturn(new ClusterName("test")); - when(metadata.clusterUUID()).thenReturn("testUUID"); - when(clusterState.metadata()).thenReturn(metadata); - when(clusterApplierService.state()).thenReturn(clusterState); - when(clusterService.getClusterApplierService()).thenReturn(clusterApplierService); - - blobStoreRepository = mock(BlobStoreRepository.class); - blobStore = mock(BlobStore.class); - when(blobStoreRepository.blobStore()).thenReturn(blobStore); - when(repositoriesService.repository("remote_store_repository")).thenReturn(blobStoreRepository); - - remoteClusterStateService = mock(RemoteClusterStateService.class); - when(remoteClusterStateService.getStats()).thenReturn(new RemotePersistenceStats()); - when(remoteClusterStateService.getThreadpool()).thenReturn(threadPool); - when(remoteClusterStateService.getBlobStore()).thenReturn(blobStore); - remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(remoteClusterStateService, clusterService); - } - - @After - public void teardown() throws Exception { - super.tearDown(); - remoteClusterStateCleanupManager.close(); - threadPool.shutdown(); - } - - public void testDeleteClusterMetadata() throws IOException { - String clusterUUID = "clusterUUID"; - String clusterName = "test-cluster"; - List inactiveBlobs = Arrays.asList( - new PlainBlobMetadata("manifest1.dat", 1L), - new PlainBlobMetadata("manifest2.dat", 1L), - new PlainBlobMetadata("manifest3.dat", 1L) - ); - List activeBlobs = Arrays.asList( - new PlainBlobMetadata("manifest4.dat", 1L), - new PlainBlobMetadata("manifest5.dat", 1L) - ); - UploadedIndexMetadata index1Metadata = new UploadedIndexMetadata("index1", "indexUUID1", "index_metadata1"); - UploadedIndexMetadata index2Metadata = new UploadedIndexMetadata("index2", "indexUUID2", "index_metadata2"); - UploadedIndexMetadata index1UpdatedMetadata = new UploadedIndexMetadata("index1", "indexUUID1", "index_metadata1_updated"); - UploadedMetadataAttribute coordinationMetadata = new UploadedMetadataAttribute(COORDINATION_METADATA, "coordination_metadata"); - UploadedMetadataAttribute templateMetadata = new UploadedMetadataAttribute(TEMPLATES_METADATA, "template_metadata"); - UploadedMetadataAttribute settingMetadata = new UploadedMetadataAttribute(SETTING_METADATA, "settings_metadata"); - UploadedMetadataAttribute coordinationMetadataUpdated = new UploadedMetadataAttribute( - COORDINATION_METADATA, - "coordination_metadata_updated" - ); - UploadedMetadataAttribute templateMetadataUpdated = new UploadedMetadataAttribute(TEMPLATES_METADATA, "template_metadata_updated"); - UploadedMetadataAttribute settingMetadataUpdated = new UploadedMetadataAttribute(SETTING_METADATA, "settings_metadata_updated"); - ClusterMetadataManifest manifest1 = ClusterMetadataManifest.builder() - .indices(List.of(index1Metadata)) - .globalMetadataFileName("global_metadata") - .clusterTerm(1L) - .stateVersion(1L) - .codecVersion(CODEC_V1) - .stateUUID(randomAlphaOfLength(10)) - .clusterUUID(clusterUUID) - .nodeId("nodeA") - .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) - .previousClusterUUID(ClusterState.UNKNOWN_UUID) - .committed(true) - .build(); - ClusterMetadataManifest manifest2 = ClusterMetadataManifest.builder(manifest1) - .indices(List.of(index1Metadata, index2Metadata)) - .codecVersion(CODEC_V2) - .globalMetadataFileName(null) - .coordinationMetadata(coordinationMetadata) - .templatesMetadata(templateMetadata) - .settingMetadata(settingMetadata) - .build(); - ClusterMetadataManifest manifest3 = ClusterMetadataManifest.builder(manifest2) - .indices(List.of(index1UpdatedMetadata, index2Metadata)) - .settingMetadata(settingMetadataUpdated) - .build(); - - // active manifest have reference to index1Updated, index2, settingsUpdated, coordinationUpdated, templates, templatesUpdated - ClusterMetadataManifest manifest4 = ClusterMetadataManifest.builder(manifest3) - .coordinationMetadata(coordinationMetadataUpdated) - .build(); - ClusterMetadataManifest manifest5 = ClusterMetadataManifest.builder(manifest4).templatesMetadata(templateMetadataUpdated).build(); - - when(remoteClusterStateService.fetchRemoteClusterMetadataManifest(eq(clusterName), eq(clusterUUID), any())).thenReturn( - manifest4, - manifest5, - manifest1, - manifest2, - manifest3 - ); - BlobContainer container = mock(BlobContainer.class); - when(blobStore.blobContainer(any())).thenReturn(container); - doNothing().when(container).deleteBlobsIgnoringIfNotExists(any()); - - remoteClusterStateCleanupManager.deleteClusterMetadata(clusterName, clusterUUID, activeBlobs, inactiveBlobs); - verify(container).deleteBlobsIgnoringIfNotExists( - List.of( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + coordinationMetadata.getUploadedFilename() + ".dat", - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + settingMetadata.getUploadedFilename() + ".dat", - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + "global_metadata.dat" - ) - ); - verify(container).deleteBlobsIgnoringIfNotExists( - List.of( - new BlobPath().add(INDEX_PATH_TOKEN).add(index1Metadata.getIndexUUID()).buildAsString() - + index1Metadata.getUploadedFilePath() - + ".dat" - ) - ); - Set staleManifest = new HashSet<>(); - inactiveBlobs.forEach(blob -> staleManifest.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blob.name())); - verify(container).deleteBlobsIgnoringIfNotExists(new ArrayList<>(staleManifest)); - } - - public void testDeleteStaleClusterUUIDs() throws IOException { - final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); - ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() - .indices(List.of()) - .clusterTerm(1L) - .stateVersion(1L) - .stateUUID(randomAlphaOfLength(10)) - .clusterUUID("cluster-uuid1") - .nodeId("nodeA") - .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) - .previousClusterUUID(ClusterState.UNKNOWN_UUID) - .committed(true) - .build(); - - BlobPath blobPath = new BlobPath().add("random-path"); - BlobContainer uuidContainerContainer = mock(BlobContainer.class); - BlobContainer manifest2Container = mock(BlobContainer.class); - BlobContainer manifest3Container = mock(BlobContainer.class); - when(blobStore.blobContainer(any())).then(invocation -> { - BlobPath blobPath1 = invocation.getArgument(0); - if (blobPath1.buildAsString().endsWith("cluster-state/")) { - return uuidContainerContainer; - } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid2/")) { - return manifest2Container; - } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid3/")) { - return manifest3Container; - } else { - throw new IllegalArgumentException("Unexpected blob path " + blobPath1); - } - }); - when( - manifest2Container.listBlobsByPrefixInSortedOrder( - MANIFEST_FILE_PREFIX + DELIMITER, - Integer.MAX_VALUE, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenReturn(List.of(new PlainBlobMetadata("mainfest2", 1L))); - when( - manifest3Container.listBlobsByPrefixInSortedOrder( - MANIFEST_FILE_PREFIX + DELIMITER, - Integer.MAX_VALUE, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenReturn(List.of(new PlainBlobMetadata("mainfest3", 1L))); - Set uuids = new HashSet<>(Arrays.asList("cluster-uuid1", "cluster-uuid2", "cluster-uuid3")); - when(remoteClusterStateService.getAllClusterUUIDs(any())).thenReturn(uuids); - when(remoteClusterStateService.getCusterMetadataBasePath(any(), any())).then( - invocationOnMock -> blobPath.add(encodeString(invocationOnMock.getArgument(0))) - .add(CLUSTER_STATE_PATH_TOKEN) - .add((String) invocationOnMock.getArgument(1)) - ); - remoteClusterStateCleanupManager.start(); - remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, clusterMetadataManifest); - try { - assertBusy(() -> { - verify(manifest2Container, times(1)).delete(); - verify(manifest3Container, times(1)).delete(); - }); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - public void testRemoteStateCleanupFailureStats() throws IOException { - BlobContainer blobContainer = mock(BlobContainer.class); - doThrow(IOException.class).when(blobContainer).delete(); - when(blobStore.blobContainer(any())).thenReturn(blobContainer); - BlobPath blobPath = new BlobPath().add("random-path"); - when((blobStoreRepository.basePath())).thenReturn(blobPath); - remoteClusterStateCleanupManager.start(); - remoteClusterStateCleanupManager.deleteStaleUUIDsClusterMetadata("cluster1", List.of("cluster-uuid1")); - try { - assertBusy(() -> { - // wait for stats to get updated - assertNotNull(remoteClusterStateCleanupManager.getStats()); - assertEquals(0, remoteClusterStateCleanupManager.getStats().getSuccessCount()); - assertEquals(1, remoteClusterStateCleanupManager.getStats().getCleanupAttemptFailedCount()); - }); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception { - BlobContainer blobContainer = mock(BlobContainer.class); - when(blobStore.blobContainer(any())).thenReturn(blobContainer); - - CountDownLatch latch = new CountDownLatch(1); - AtomicInteger callCount = new AtomicInteger(0); - doAnswer(invocation -> { - callCount.incrementAndGet(); - if (latch.await(5000, TimeUnit.SECONDS) == false) { - throw new Exception("Timed out waiting for delete task queuing to complete"); - } - return null; - }).when(blobContainer) - .listBlobsByPrefixInSortedOrder( - any(String.class), - any(int.class), - any(BlobContainer.BlobNameSortOrder.class), - any(ActionListener.class) - ); - - remoteClusterStateCleanupManager.start(); - remoteClusterStateCleanupManager.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); - remoteClusterStateCleanupManager.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); - - latch.countDown(); - assertBusy(() -> assertEquals(1, callCount.get())); - } - - public void testRemoteClusterStateCleanupSetting() { - remoteClusterStateCleanupManager.start(); - // verify default value - assertEquals(CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, remoteClusterStateCleanupManager.getStaleFileCleanupInterval()); - - // verify update interval - int cleanupInterval = randomIntBetween(1, 10); - Settings newSettings = Settings.builder().put("cluster.remote_store.state.cleanup_interval", cleanupInterval + "s").build(); - clusterSettings.applySettings(newSettings); - assertEquals(cleanupInterval, remoteClusterStateCleanupManager.getStaleFileCleanupInterval().seconds()); - } - - public void testRemoteCleanupTaskScheduled() { - AbstractAsyncTask cleanupTask = remoteClusterStateCleanupManager.getStaleFileDeletionTask(); - assertNull(cleanupTask); - // now the task should be initialized - remoteClusterStateCleanupManager.start(); - assertNotNull(remoteClusterStateCleanupManager.getStaleFileDeletionTask()); - assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().mustReschedule()); - assertEquals( - clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING), - remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval() - ); - assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); - assertFalse(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isClosed()); - } - - public void testRemoteCleanupSkipsOnOnlyElectedClusterManager() { - DiscoveryNodes nodes = mock(DiscoveryNodes.class); - when(nodes.isLocalNodeElectedClusterManager()).thenReturn(false); - when(clusterState.nodes()).thenReturn(nodes); - RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); - AtomicInteger callCount = new AtomicInteger(0); - doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt()); - spyManager.cleanUpStaleFiles(); - assertEquals(0, callCount.get()); - - when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true); - when(clusterState.version()).thenReturn(randomLongBetween(11, 20)); - spyManager.cleanUpStaleFiles(); - assertEquals(1, callCount.get()); - } - - public void testRemoteCleanupSkipsIfVersionIncrementLessThanThreshold() { - DiscoveryNodes nodes = mock(DiscoveryNodes.class); - long version = randomLongBetween(1, SKIP_CLEANUP_STATE_CHANGES); - when(clusterApplierService.state()).thenReturn(clusterState); - when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true); - when(clusterState.nodes()).thenReturn(nodes); - when(clusterState.version()).thenReturn(version); - - RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); - AtomicInteger callCount = new AtomicInteger(0); - doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt()); - - remoteClusterStateCleanupManager.cleanUpStaleFiles(); - assertEquals(0, callCount.get()); - } - - public void testRemoteCleanupCallsDeleteIfVersionIncrementGreaterThanThreshold() { - DiscoveryNodes nodes = mock(DiscoveryNodes.class); - long version = randomLongBetween(SKIP_CLEANUP_STATE_CHANGES + 1, SKIP_CLEANUP_STATE_CHANGES + 10); - when(clusterApplierService.state()).thenReturn(clusterState); - when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true); - when(clusterState.nodes()).thenReturn(nodes); - when(clusterState.version()).thenReturn(version); - - RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); - AtomicInteger callCount = new AtomicInteger(0); - doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt()); - - // using spied cleanup manager so that stubbed deleteStaleClusterMetadata is called - spyManager.cleanUpStaleFiles(); - assertEquals(1, callCount.get()); - } - - public void testRemoteCleanupSchedulesEvenAfterFailure() { - remoteClusterStateCleanupManager.start(); - RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); - AtomicInteger callCount = new AtomicInteger(0); - doAnswer(invocationOnMock -> { - callCount.incrementAndGet(); - throw new RuntimeException("Test exception"); - }).when(spyManager).cleanUpStaleFiles(); - AsyncStaleFileDeletion task = new AsyncStaleFileDeletion(spyManager); - assertTrue(task.isScheduled()); - task.run(); - // Task is still scheduled after the failure - assertTrue(task.isScheduled()); - assertEquals(1, callCount.get()); - - task.run(); - // Task is still scheduled after the failure - assertTrue(task.isScheduled()); - assertEquals(2, callCount.get()); - } -} diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 5f0c371a3137e..1b242b921c0d7 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -19,7 +19,6 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.TemplatesMetadata; import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; @@ -73,6 +72,9 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.Function; import java.util.function.Supplier; @@ -91,6 +93,7 @@ import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.RETAINED_MANIFESTS; import static org.opensearch.gateway.remote.RemoteClusterStateService.SETTING_METADATA; import static org.opensearch.gateway.remote.RemoteClusterStateService.TEMPLATES_METADATA; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -106,12 +109,13 @@ import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; public class RemoteClusterStateServiceTests extends OpenSearchTestCase { private RemoteClusterStateService remoteClusterStateService; - private ClusterService clusterService; private ClusterSettings clusterSettings; private Supplier repositoriesServiceSupplier; private RepositoriesService repositoriesService; @@ -144,8 +148,6 @@ public void setup() { .build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - clusterService = mock(ClusterService.class); - when(clusterService.getClusterSettings()).thenReturn(clusterSettings); NamedXContentRegistry xContentRegistry = new NamedXContentRegistry( Stream.of( NetworkModule.getNamedXContents().stream(), @@ -163,7 +165,7 @@ public void setup() { "test-node-id", repositoriesServiceSupplier, settings, - clusterService, + clusterSettings, () -> 0L, threadPool, List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) @@ -185,14 +187,14 @@ public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException public void testFailInitializationWhenRemoteStateDisabled() { final Settings settings = Settings.builder().build(); - when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); assertThrows( AssertionError.class, () -> new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, settings, - clusterService, + clusterSettings, () -> 0L, threadPool, List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) @@ -1278,6 +1280,72 @@ public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOEx assertThat(previousClusterUUID, equalTo("cluster-uuid2")); } + public void testDeleteStaleClusterUUIDs() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID(randomAlphaOfLength(10)) + .clusterUUID("cluster-uuid1") + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID(ClusterState.UNKNOWN_UUID) + .committed(true) + .build(); + + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + BlobContainer uuidContainerContainer = mock(BlobContainer.class); + BlobContainer manifest2Container = mock(BlobContainer.class); + BlobContainer manifest3Container = mock(BlobContainer.class); + when(blobStore.blobContainer(any())).then(invocation -> { + BlobPath blobPath1 = invocation.getArgument(0); + if (blobPath1.buildAsString().endsWith("cluster-state/")) { + return uuidContainerContainer; + } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid2/")) { + return manifest2Container; + } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid3/")) { + return manifest3Container; + } else { + throw new IllegalArgumentException("Unexpected blob path " + blobPath1); + } + }); + Map blobMetadataMap = Map.of( + "cluster-uuid1", + mock(BlobContainer.class), + "cluster-uuid2", + mock(BlobContainer.class), + "cluster-uuid3", + mock(BlobContainer.class) + ); + when(uuidContainerContainer.children()).thenReturn(blobMetadataMap); + when( + manifest2Container.listBlobsByPrefixInSortedOrder( + MANIFEST_FILE_PREFIX + DELIMITER, + Integer.MAX_VALUE, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(List.of(new PlainBlobMetadata("mainfest2", 1L))); + when( + manifest3Container.listBlobsByPrefixInSortedOrder( + MANIFEST_FILE_PREFIX + DELIMITER, + Integer.MAX_VALUE, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(List.of(new PlainBlobMetadata("mainfest3", 1L))); + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleClusterUUIDs(clusterState, clusterMetadataManifest); + try { + assertBusy(() -> { + verify(manifest2Container, times(1)).delete(); + verify(manifest3Container, times(1)).delete(); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + public void testRemoteStateStats() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); @@ -1290,6 +1358,26 @@ public void testRemoteStateStats() throws IOException { assertEquals(0, remoteClusterStateService.getStats().getFailedCount()); } + public void testRemoteStateCleanupFailureStats() throws IOException { + BlobContainer blobContainer = mock(BlobContainer.class); + doThrow(IOException.class).when(blobContainer).delete(); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleUUIDsClusterMetadata("cluster1", Arrays.asList("cluster-uuid1")); + try { + assertBusy(() -> { + // wait for stats to get updated + assertTrue(remoteClusterStateService.getStats() != null); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(1, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + public void testFileNames() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() @@ -1330,6 +1418,36 @@ private void verifyManifestFileNameWithCodec(int codecVersion) { assertThat(splittedName[3], is("P")); } + public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception { + BlobContainer blobContainer = mock(BlobContainer.class); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + + CountDownLatch latch = new CountDownLatch(1); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> { + callCount.incrementAndGet(); + if (latch.await(5000, TimeUnit.SECONDS) == false) { + throw new Exception("Timed out waiting for delete task queuing to complete"); + } + return null; + }).when(blobContainer) + .listBlobsByPrefixInSortedOrder( + any(String.class), + any(int.class), + any(BlobContainer.BlobNameSortOrder.class), + any(ActionListener.class) + ); + + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + + latch.countDown(); + assertBusy(() -> assertEquals(1, callCount.get())); + } + public void testIndexMetadataUploadWaitTimeSetting() { // verify default value assertEquals( @@ -1773,7 +1891,7 @@ private static ClusterState.Builder generateClusterStateWithGlobalMetadata() { ); } - static ClusterState.Builder generateClusterStateWithOneIndex() { + private static ClusterState.Builder generateClusterStateWithOneIndex() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) @@ -1803,7 +1921,7 @@ static ClusterState.Builder generateClusterStateWithOneIndex() { ); } - static DiscoveryNodes nodesWithLocalNodeClusterManager() { + private static DiscoveryNodes nodesWithLocalNodeClusterManager() { return DiscoveryNodes.builder().clusterManagerNodeId("cluster-manager-id").localNodeId("cluster-manager-id").build(); } From 11599c58bb96e90076d54629ea6b4f98469b42a7 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Tue, 4 Jun 2024 16:09:18 -0700 Subject: [PATCH 13/15] [Tiered Caching] Additional ITs for cache stats (#13655) * Adds cache clear IT Signed-off-by: Peter Alfonsi Cleaned up logic for cache stats ITs Signed-off-by: Peter Alfonsi Adds more tests around tiered spillover cache Signed-off-by: Peter Alfonsi Fixed cache stats behavior for overall /_nodes/stats call Signed-off-by: Peter Alfonsi cleanup Signed-off-by: Peter Alfonsi Fixed folder structure Signed-off-by: Peter Alfonsi Addressed Sagar's comments Signed-off-by: Peter Alfonsi Addressed Ankit's comments Signed-off-by: Peter Alfonsi Break horrifyingly long test case into many shorter cases Signed-off-by: Peter Alfonsi Added unsupported operation exception to TSC stats holder incrementEvictions() Signed-off-by: Peter Alfonsi Addressed Sorabh's comments Signed-off-by: Peter Alfonsi * rerun assemble Signed-off-by: Peter Alfonsi * rerun gradle Signed-off-by: Peter Alfonsi * rerun gradle Signed-off-by: Peter Alfonsi * rerun gradle Signed-off-by: Peter Alfonsi --------- Signed-off-by: Peter Alfonsi Co-authored-by: Peter Alfonsi --- .../common/tier}/TieredSpilloverCacheIT.java | 4 +- .../tier/TieredSpilloverCacheStatsIT.java | 501 ++++++++++++++++++ .../common/tier/TieredSpilloverCache.java | 16 +- .../tier/TieredSpilloverCacheStatsHolder.java | 29 +- .../tier/TieredSpilloverCacheTests.java | 17 +- .../CacheStatsAPIIndicesRequestCacheIT.java | 194 ++++--- .../admin/indices/stats/CommonStatsFlags.java | 2 +- .../common/cache/service/NodeCacheStats.java | 9 + .../indices/IndicesRequestCache.java | 27 +- 9 files changed, 700 insertions(+), 99 deletions(-) rename modules/cache-common/src/internalClusterTest/java/{org.opensearch.cache.common.tier => org/opensearch/cache/common/tier}/TieredSpilloverCacheIT.java (99%) create mode 100644 modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsIT.java diff --git a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java b/modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheIT.java similarity index 99% rename from modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java rename to modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheIT.java index bfc184cff0566..02be0990eb136 100644 --- a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java +++ b/modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheIT.java @@ -65,7 +65,7 @@ protected Collection> nodePlugins() { return Arrays.asList(TieredSpilloverCachePlugin.class, MockDiskCachePlugin.class); } - private Settings defaultSettings(String onHeapCacheSizeInBytesOrPecentage) { + static Settings defaultSettings(String onHeapCacheSizeInBytesOrPercentage) { return Settings.builder() .put(FeatureFlags.PLUGGABLE_CACHE, "true") .put( @@ -88,7 +88,7 @@ private Settings defaultSettings(String onHeapCacheSizeInBytesOrPecentage) { OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) .get(MAXIMUM_SIZE_IN_BYTES_KEY) .getKey(), - onHeapCacheSizeInBytesOrPecentage + onHeapCacheSizeInBytesOrPercentage ) .build(); } diff --git a/modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsIT.java b/modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsIT.java new file mode 100644 index 0000000000000..537caccbac652 --- /dev/null +++ b/modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsIT.java @@ -0,0 +1,501 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cache.common.tier; + +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.cache.CacheType; +import org.opensearch.common.cache.service.NodeCacheStats; +import org.opensearch.common.cache.stats.ImmutableCacheStats; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.cache.request.RequestCacheStats; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.indices.IndicesRequestCache; +import org.opensearch.plugins.Plugin; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.hamcrest.OpenSearchAssertions; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_NAME; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_DISK; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_ON_HEAP; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; + +// Use a single data node to simplify accessing cache stats across different shards. +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class TieredSpilloverCacheStatsIT extends OpenSearchIntegTestCase { + @Override + protected Collection> nodePlugins() { + return Arrays.asList(TieredSpilloverCachePlugin.class, TieredSpilloverCacheIT.MockDiskCachePlugin.class); + } + + private final String HEAP_CACHE_SIZE_STRING = "10000B"; + private final int HEAP_CACHE_SIZE = 10_000; + private final String index1Name = "index1"; + private final String index2Name = "index2"; + + /** + * Test aggregating by indices + */ + public void testIndicesLevelAggregation() throws Exception { + internalCluster().startNodes( + 1, + Settings.builder() + .put(TieredSpilloverCacheIT.defaultSettings(HEAP_CACHE_SIZE_STRING)) + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.SECONDS) + ) + .build() + ); + Client client = client(); + Map values = setupCacheForAggregationTests(client); + + ImmutableCacheStatsHolder allLevelsStatsHolder = getNodeCacheStatsResult( + client, + List.of(IndicesRequestCache.INDEX_DIMENSION_NAME, TIER_DIMENSION_NAME) + ); + ImmutableCacheStatsHolder indicesOnlyStatsHolder = getNodeCacheStatsResult( + client, + List.of(IndicesRequestCache.INDEX_DIMENSION_NAME) + ); + + // Get values for indices alone, assert these match for statsHolders that have additional dimensions vs. a statsHolder that only has + // the indices dimension + ImmutableCacheStats index1ExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnHeapIndex1") + values.get("hitsOnDiskIndex1"), + values.get("itemsOnDiskIndex1AfterTest") + values.get("itemsOnHeapIndex1AfterTest"), + 0, + (values.get("itemsOnDiskIndex1AfterTest") + values.get("itemsOnHeapIndex1AfterTest")) * values.get("singleSearchSize"), + values.get("itemsOnDiskIndex1AfterTest") + values.get("itemsOnHeapIndex1AfterTest") + ) + ); + ImmutableCacheStats index2ExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnHeapIndex2") + values.get("hitsOnDiskIndex2"), + values.get("itemsOnDiskIndex2AfterTest") + values.get("itemsOnHeapIndex2AfterTest"), + 0, + (values.get("itemsOnDiskIndex2AfterTest") + values.get("itemsOnHeapIndex2AfterTest")) * values.get("singleSearchSize"), + values.get("itemsOnDiskIndex2AfterTest") + values.get("itemsOnHeapIndex2AfterTest") + ) + ); + + for (ImmutableCacheStatsHolder statsHolder : List.of(allLevelsStatsHolder, indicesOnlyStatsHolder)) { + assertEquals(index1ExpectedStats, statsHolder.getStatsForDimensionValues(List.of(index1Name))); + assertEquals(index2ExpectedStats, statsHolder.getStatsForDimensionValues(List.of(index2Name))); + } + } + + /** + * Test aggregating by indices and tier + */ + public void testIndicesAndTierLevelAggregation() throws Exception { + internalCluster().startNodes( + 1, + Settings.builder() + .put(TieredSpilloverCacheIT.defaultSettings(HEAP_CACHE_SIZE_STRING)) + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.SECONDS) + ) + .build() + ); + Client client = client(); + Map values = setupCacheForAggregationTests(client); + + ImmutableCacheStatsHolder allLevelsStatsHolder = getNodeCacheStatsResult( + client, + List.of(IndicesRequestCache.INDEX_DIMENSION_NAME, TIER_DIMENSION_NAME) + ); + + // Get values broken down by indices+tiers + ImmutableCacheStats index1HeapExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnHeapIndex1"), + values.get("itemsOnHeapIndex1AfterTest") + values.get("itemsOnDiskIndex1AfterTest") + values.get("hitsOnDiskIndex1"), + values.get("itemsOnDiskIndex1AfterTest"), + values.get("itemsOnHeapIndex1AfterTest") * values.get("singleSearchSize"), + values.get("itemsOnHeapIndex1AfterTest") + ) + ); + assertEquals( + index1HeapExpectedStats, + allLevelsStatsHolder.getStatsForDimensionValues(List.of(index1Name, TIER_DIMENSION_VALUE_ON_HEAP)) + ); + + ImmutableCacheStats index2HeapExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnHeapIndex2"), + values.get("itemsOnHeapIndex2AfterTest") + values.get("itemsOnDiskIndex2AfterTest") + values.get("hitsOnDiskIndex2"), + values.get("itemsOnDiskIndex2AfterTest"), + values.get("itemsOnHeapIndex2AfterTest") * values.get("singleSearchSize"), + values.get("itemsOnHeapIndex2AfterTest") + ) + ); + assertEquals( + index2HeapExpectedStats, + allLevelsStatsHolder.getStatsForDimensionValues(List.of(index2Name, TIER_DIMENSION_VALUE_ON_HEAP)) + ); + + ImmutableCacheStats index1DiskExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnDiskIndex1"), + values.get("itemsOnHeapIndex1AfterTest") + values.get("itemsOnDiskIndex1AfterTest"), + 0, + values.get("itemsOnDiskIndex1AfterTest") * values.get("singleSearchSize"), + values.get("itemsOnDiskIndex1AfterTest") + ) + ); + assertEquals( + index1DiskExpectedStats, + allLevelsStatsHolder.getStatsForDimensionValues(List.of(index1Name, TIER_DIMENSION_VALUE_DISK)) + ); + + ImmutableCacheStats index2DiskExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnDiskIndex2"), + values.get("itemsOnHeapIndex2AfterTest") + values.get("itemsOnDiskIndex2AfterTest"), + 0, + values.get("itemsOnDiskIndex2AfterTest") * values.get("singleSearchSize"), + values.get("itemsOnDiskIndex2AfterTest") + ) + ); + assertEquals( + index2DiskExpectedStats, + allLevelsStatsHolder.getStatsForDimensionValues(List.of(index2Name, TIER_DIMENSION_VALUE_DISK)) + ); + } + + /** + * Test aggregating by tier only + */ + public void testTierLevelAggregation() throws Exception { + internalCluster().startNodes( + 1, + Settings.builder() + .put(TieredSpilloverCacheIT.defaultSettings(HEAP_CACHE_SIZE_STRING)) + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.SECONDS) + ) + .build() + ); + Client client = client(); + Map values = setupCacheForAggregationTests(client); + + // Get values for tiers alone and check they add correctly across indices + ImmutableCacheStatsHolder tiersOnlyStatsHolder = getNodeCacheStatsResult(client, List.of(TIER_DIMENSION_NAME)); + ImmutableCacheStats totalHeapExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnHeapIndex1") + values.get("hitsOnHeapIndex2"), + values.get("itemsOnHeapAfterTest") + values.get("itemsOnDiskAfterTest") + values.get("hitsOnDiskIndex1") + values.get( + "hitsOnDiskIndex2" + ), + values.get("itemsOnDiskAfterTest"), + values.get("itemsOnHeapAfterTest") * values.get("singleSearchSize"), + values.get("itemsOnHeapAfterTest") + ) + ); + ImmutableCacheStats heapStats = tiersOnlyStatsHolder.getStatsForDimensionValues(List.of(TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(totalHeapExpectedStats, heapStats); + ImmutableCacheStats totalDiskExpectedStats = returnNullIfAllZero( + new ImmutableCacheStats( + values.get("hitsOnDiskIndex1") + values.get("hitsOnDiskIndex2"), + values.get("itemsOnHeapAfterTest") + values.get("itemsOnDiskAfterTest"), + 0, + values.get("itemsOnDiskAfterTest") * values.get("singleSearchSize"), + values.get("itemsOnDiskAfterTest") + ) + ); + ImmutableCacheStats diskStats = tiersOnlyStatsHolder.getStatsForDimensionValues(List.of(TIER_DIMENSION_VALUE_DISK)); + assertEquals(totalDiskExpectedStats, diskStats); + } + + public void testInvalidLevelsAreIgnored() throws Exception { + internalCluster().startNodes( + 1, + Settings.builder() + .put(TieredSpilloverCacheIT.defaultSettings(HEAP_CACHE_SIZE_STRING)) + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.SECONDS) + ) + .build() + ); + Client client = client(); + Map values = setupCacheForAggregationTests(client); + + ImmutableCacheStatsHolder allLevelsStatsHolder = getNodeCacheStatsResult( + client, + List.of(IndicesRequestCache.INDEX_DIMENSION_NAME, TIER_DIMENSION_NAME) + ); + ImmutableCacheStatsHolder indicesOnlyStatsHolder = getNodeCacheStatsResult( + client, + List.of(IndicesRequestCache.INDEX_DIMENSION_NAME) + ); + + // Test invalid levels are ignored and permuting the order of levels in the request doesn't matter + + // This should be equivalent to just "indices" + ImmutableCacheStatsHolder indicesEquivalentStatsHolder = getNodeCacheStatsResult( + client, + List.of(IndicesRequestCache.INDEX_DIMENSION_NAME, "unrecognized_dimension") + ); + assertEquals(indicesOnlyStatsHolder, indicesEquivalentStatsHolder); + + // This should be equivalent to "indices", "tier" + ImmutableCacheStatsHolder indicesAndTierEquivalentStatsHolder = getNodeCacheStatsResult( + client, + List.of(TIER_DIMENSION_NAME, "unrecognized_dimension_1", IndicesRequestCache.INDEX_DIMENSION_NAME, "unrecognized_dimension_2") + ); + assertEquals(allLevelsStatsHolder, indicesAndTierEquivalentStatsHolder); + + // This should be equivalent to no levels passed in + ImmutableCacheStatsHolder noLevelsEquivalentStatsHolder = getNodeCacheStatsResult( + client, + List.of("unrecognized_dimension_1", "unrecognized_dimension_2") + ); + ImmutableCacheStatsHolder noLevelsStatsHolder = getNodeCacheStatsResult(client, List.of()); + assertEquals(noLevelsStatsHolder, noLevelsEquivalentStatsHolder); + } + + /** + * Check the new stats API returns the same values as the old stats API. + */ + public void testStatsMatchOldApi() throws Exception { + internalCluster().startNodes( + 1, + Settings.builder() + .put(TieredSpilloverCacheIT.defaultSettings(HEAP_CACHE_SIZE_STRING)) + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.SECONDS) + ) + .build() + ); + String index = "index"; + Client client = client(); + startIndex(client, index); + + // First search one time to see how big a single value will be + searchIndex(client, index, 0); + // get total stats + long singleSearchSize = getTotalStats(client).getSizeInBytes(); + // Select numbers so we get some values on both heap and disk + int itemsOnHeap = HEAP_CACHE_SIZE / (int) singleSearchSize; + int itemsOnDisk = 1 + randomInt(30); // The first one we search (to get the size) always goes to disk + int expectedEntries = itemsOnHeap + itemsOnDisk; + + for (int i = 1; i < expectedEntries; i++) { + // Cause misses + searchIndex(client, index, i); + } + int expectedMisses = itemsOnHeap + itemsOnDisk; + + // Cause some hits + int expectedHits = randomIntBetween(itemsOnHeap, expectedEntries); // Select it so some hits come from both tiers + for (int i = 0; i < expectedHits; i++) { + searchIndex(client, index, i); + } + + ImmutableCacheStats totalStats = getNodeCacheStatsResult(client, List.of()).getTotalStats(); + + // Check the new stats API values are as expected + assertEquals( + new ImmutableCacheStats(expectedHits, expectedMisses, 0, expectedEntries * singleSearchSize, expectedEntries), + totalStats + ); + // Now check the new stats API values for the cache as a whole match the old stats API values + RequestCacheStats oldAPIStats = client.admin() + .indices() + .prepareStats(index) + .setRequestCache(true) + .get() + .getTotal() + .getRequestCache(); + assertEquals(oldAPIStats.getHitCount(), totalStats.getHits()); + assertEquals(oldAPIStats.getMissCount(), totalStats.getMisses()); + assertEquals(oldAPIStats.getEvictions(), totalStats.getEvictions()); + assertEquals(oldAPIStats.getMemorySizeInBytes(), totalStats.getSizeInBytes()); + } + + private void startIndex(Client client, String indexName) throws InterruptedException { + assertAcked( + client.admin() + .indices() + .prepareCreate(indexName) + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build() + ) + .get() + ); + indexRandom(true, client.prepareIndex(indexName).setSource("k", "hello")); + ensureSearchable(indexName); + } + + private Map setupCacheForAggregationTests(Client client) throws Exception { + startIndex(client, index1Name); + startIndex(client, index2Name); + + // First search one time to see how big a single value will be + searchIndex(client, index1Name, 0); + // get total stats + long singleSearchSize = getTotalStats(client).getSizeInBytes(); + int itemsOnHeapAfterTest = HEAP_CACHE_SIZE / (int) singleSearchSize; // As the heap tier evicts, the items on it after the test will + // be the same as its max capacity + int itemsOnDiskAfterTest = 1 + randomInt(30); // The first one we search (to get the size) always goes to disk + + // Put some values on heap and disk for each index + int itemsOnHeapIndex1AfterTest = randomInt(itemsOnHeapAfterTest); + int itemsOnHeapIndex2AfterTest = itemsOnHeapAfterTest - itemsOnHeapIndex1AfterTest; + int itemsOnDiskIndex1AfterTest = 1 + randomInt(itemsOnDiskAfterTest - 1); + // The first one we search (to get the size) always goes to disk + int itemsOnDiskIndex2AfterTest = itemsOnDiskAfterTest - itemsOnDiskIndex1AfterTest; + int hitsOnHeapIndex1 = randomInt(itemsOnHeapIndex1AfterTest); + int hitsOnDiskIndex1 = randomInt(itemsOnDiskIndex1AfterTest); + int hitsOnHeapIndex2 = randomInt(itemsOnHeapIndex2AfterTest); + int hitsOnDiskIndex2 = randomInt(itemsOnDiskIndex2AfterTest); + + // Put these values into a map so tests can know what to expect in stats responses + Map expectedValues = new HashMap<>(); + expectedValues.put("itemsOnHeapIndex1AfterTest", itemsOnHeapIndex1AfterTest); + expectedValues.put("itemsOnHeapIndex2AfterTest", itemsOnHeapIndex2AfterTest); + expectedValues.put("itemsOnDiskIndex1AfterTest", itemsOnDiskIndex1AfterTest); + expectedValues.put("itemsOnDiskIndex2AfterTest", itemsOnDiskIndex2AfterTest); + expectedValues.put("hitsOnHeapIndex1", hitsOnHeapIndex1); + expectedValues.put("hitsOnDiskIndex1", hitsOnDiskIndex1); + expectedValues.put("hitsOnHeapIndex2", hitsOnHeapIndex2); + expectedValues.put("hitsOnDiskIndex2", hitsOnDiskIndex2); + expectedValues.put("singleSearchSize", (int) singleSearchSize); + expectedValues.put("itemsOnDiskAfterTest", itemsOnDiskAfterTest); + expectedValues.put("itemsOnHeapAfterTest", itemsOnHeapAfterTest); // Can only pass 10 keys in Map.of() constructor + + // The earliest items (0 - itemsOnDiskAfterTest) are the ones which get evicted to disk + for (int i = 1; i < itemsOnDiskIndex1AfterTest; i++) { // Start at 1 as 0 has already been searched + searchIndex(client, index1Name, i); + } + for (int i = itemsOnDiskIndex1AfterTest; i < itemsOnDiskIndex1AfterTest + itemsOnDiskIndex2AfterTest; i++) { + searchIndex(client, index2Name, i); + } + // The remaining items stay on heap + for (int i = itemsOnDiskAfterTest; i < itemsOnDiskAfterTest + itemsOnHeapIndex1AfterTest; i++) { + searchIndex(client, index1Name, i); + } + for (int i = itemsOnDiskAfterTest + itemsOnHeapIndex1AfterTest; i < itemsOnDiskAfterTest + itemsOnHeapAfterTest; i++) { + searchIndex(client, index2Name, i); + } + + // Get some hits on all combinations of indices and tiers + for (int i = itemsOnDiskAfterTest; i < itemsOnDiskAfterTest + hitsOnHeapIndex1; i++) { + // heap hits for index 1 + searchIndex(client, index1Name, i); + } + for (int i = itemsOnDiskAfterTest + itemsOnHeapIndex1AfterTest; i < itemsOnDiskAfterTest + itemsOnHeapIndex1AfterTest + + hitsOnHeapIndex2; i++) { + // heap hits for index 2 + searchIndex(client, index2Name, i); + } + for (int i = 0; i < hitsOnDiskIndex1; i++) { + // disk hits for index 1 + searchIndex(client, index1Name, i); + } + for (int i = itemsOnDiskIndex1AfterTest; i < itemsOnDiskIndex1AfterTest + hitsOnDiskIndex2; i++) { + // disk hits for index 2 + searchIndex(client, index2Name, i); + } + return expectedValues; + } + + private ImmutableCacheStats returnNullIfAllZero(ImmutableCacheStats expectedStats) { + // If the randomly chosen numbers are such that the expected stats would be 0, we actually have not interacted with the cache for + // this index. + // In this case, we expect the stats holder to have no stats for this node, and therefore we should get null from + // statsHolder.getStatsForDimensionValues(). + // We will not see it in the XContent response. + if (expectedStats.equals(new ImmutableCacheStats(0, 0, 0, 0, 0))) { + return null; + } + return expectedStats; + } + + // Duplicated from CacheStatsAPIIndicesRequestCacheIT.java, as we can't add a dependency on server.internalClusterTest + + private SearchResponse searchIndex(Client client, String index, int searchSuffix) { + SearchResponse resp = client.prepareSearch(index) + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k", "hello" + padWithZeros(4, searchSuffix))) + // pad with zeros so request 0 and request 10 have the same size ("0000" and "0010" instead of "0" and "10") + .get(); + assertSearchResponse(resp); + OpenSearchAssertions.assertAllSuccessful(resp); + return resp; + } + + private String padWithZeros(int finalLength, int inputValue) { + // Avoid forbidden API String.format() + String input = String.valueOf(inputValue); + if (input.length() >= finalLength) { + return input; + } + StringBuilder sb = new StringBuilder(); + while (sb.length() < finalLength - input.length()) { + sb.append('0'); + } + sb.append(input); + return sb.toString(); + } + + private ImmutableCacheStats getTotalStats(Client client) throws IOException { + ImmutableCacheStatsHolder statsHolder = getNodeCacheStatsResult(client, List.of()); + return statsHolder.getStatsForDimensionValues(List.of()); + } + + private static ImmutableCacheStatsHolder getNodeCacheStatsResult(Client client, List aggregationLevels) throws IOException { + CommonStatsFlags statsFlags = new CommonStatsFlags(); + statsFlags.includeAllCacheTypes(); + String[] flagsLevels; + if (aggregationLevels == null) { + flagsLevels = null; + } else { + flagsLevels = aggregationLevels.toArray(new String[0]); + } + statsFlags.setLevels(flagsLevels); + + NodesStatsResponse nodeStatsResponse = client.admin() + .cluster() + .prepareNodesStats("data:true") + .addMetric(NodesStatsRequest.Metric.CACHE_STATS.metricName()) + .setIndices(statsFlags) + .get(); + // Can always get the first data node as there's only one in this test suite + assertEquals(1, nodeStatsResponse.getNodes().size()); + NodeCacheStats ncs = nodeStatsResponse.getNodes().get(0).getNodeCacheStats(); + return ncs.getStatsByCache(CacheType.INDICES_REQUEST_CACHE); + } +} diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java index f40c35dde83de..63cdbca101f2a 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java @@ -327,6 +327,7 @@ private Function, Tuple> getValueFromTieredCache(boolean void handleRemovalFromHeapTier(RemovalNotification, V> notification) { ICacheKey key = notification.getKey(); boolean wasEvicted = SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()); + boolean countEvictionTowardsTotal = false; // Don't count this eviction towards the cache's total if it ends up in the disk tier if (caches.get(diskCache).isEnabled() && wasEvicted && evaluatePolicies(notification.getValue())) { try (ReleasableLock ignore = writeLock.acquire()) { diskCache.put(key, notification.getValue()); // spill over to the disk tier and increment its stats @@ -336,21 +337,28 @@ void handleRemovalFromHeapTier(RemovalNotification, V> notification // If the value is not going to the disk cache, send this notification to the TSC's removal listener // as the value is leaving the TSC entirely removalListener.onRemoval(notification); + countEvictionTowardsTotal = true; } - updateStatsOnRemoval(TIER_DIMENSION_VALUE_ON_HEAP, wasEvicted, key, notification.getValue()); + updateStatsOnRemoval(TIER_DIMENSION_VALUE_ON_HEAP, wasEvicted, key, notification.getValue(), countEvictionTowardsTotal); } void handleRemovalFromDiskTier(RemovalNotification, V> notification) { // Values removed from the disk tier leave the TSC entirely removalListener.onRemoval(notification); boolean wasEvicted = SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()); - updateStatsOnRemoval(TIER_DIMENSION_VALUE_DISK, wasEvicted, notification.getKey(), notification.getValue()); + updateStatsOnRemoval(TIER_DIMENSION_VALUE_DISK, wasEvicted, notification.getKey(), notification.getValue(), true); } - void updateStatsOnRemoval(String removedFromTierValue, boolean wasEvicted, ICacheKey key, V value) { + void updateStatsOnRemoval( + String removedFromTierValue, + boolean wasEvicted, + ICacheKey key, + V value, + boolean countEvictionTowardsTotal + ) { List dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, removedFromTierValue); if (wasEvicted) { - statsHolder.incrementEvictions(dimensionValues); + statsHolder.incrementEvictions(dimensionValues, countEvictionTowardsTotal); } statsHolder.decrementItems(dimensionValues); statsHolder.decrementSizeInBytes(dimensionValues, weigher.applyAsLong(key, value)); diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java index d17059e8dee94..b40724430454b 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java @@ -105,20 +105,29 @@ public void incrementMisses(List dimensionValues) { internalIncrement(dimensionValues, missIncrementer, true); } + /** + * This method shouldn't be used in this class. Instead, use incrementEvictions(dimensionValues, includeInTotal) + * which specifies whether the eviction should be included in the cache's total evictions, or if it should + * just count towards that tier's evictions. + * @param dimensionValues The dimension values + */ @Override public void incrementEvictions(List dimensionValues) { - final String tierValue = validateTierDimensionValue(dimensionValues); + throw new UnsupportedOperationException( + "TieredSpilloverCacheHolder must specify whether to include an eviction in the total cache stats. Use incrementEvictions(List dimensionValues, boolean includeInTotal)" + ); + } - // If the disk tier is present, only evictions from the disk tier should be included in total values. + /** + * Increment evictions for this set of dimension values. + * @param dimensionValues The dimension values + * @param includeInTotal Whether to include this eviction in the total for the whole cache's evictions + */ + public void incrementEvictions(List dimensionValues, boolean includeInTotal) { + validateTierDimensionValue(dimensionValues); + // If we count this eviction towards the total, we should increment all ancestor nodes. If not, only increment the leaf node. Consumer evictionsIncrementer = (node) -> { - if (tierValue.equals(TIER_DIMENSION_VALUE_ON_HEAP) && diskCacheEnabled) { - // If on-heap tier, increment only the leaf node corresponding to the on heap values; not the total values in its parent - // nodes - if (node.isAtLowestLevel()) { - node.incrementEvictions(); - } - } else { - // If disk tier, or on-heap tier with a disabled disk tier, increment the leaf node and its parents + if (includeInTotal || node.isAtLowestLevel()) { node.incrementEvictions(); } }; diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java index 6c49341591589..54b15f236a418 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java @@ -926,14 +926,14 @@ public void testDiskTierPolicies() throws Exception { MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); TieredSpilloverCache tieredSpilloverCache = intializeTieredSpilloverCache( keyValueSize, - 100, + keyValueSize * 100, removalListener, Settings.builder() .put( OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) .get(MAXIMUM_SIZE_IN_BYTES_KEY) .getKey(), - onHeapCacheSize * 50 + "b" + onHeapCacheSize * keyValueSize + "b" ) .build(), 0, @@ -955,6 +955,7 @@ public void testDiskTierPolicies() throws Exception { LoadAwareCacheLoader, String> loader = getLoadAwareCacheLoader(keyValuePairs); + int expectedEvictions = 0; for (String key : keyValuePairs.keySet()) { ICacheKey iCacheKey = getICacheKey(key); Boolean expectedOutput = expectedOutputs.get(key); @@ -967,8 +968,15 @@ public void testDiskTierPolicies() throws Exception { } else { // Should miss as heap tier size = 0 and the policy rejected it assertNull(result); + expectedEvictions++; } } + + // We expect values that were evicted from the heap tier and not allowed into the disk tier by the policy + // to count towards total evictions + assertEquals(keyValuePairs.size(), getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); // Disk tier is large enough for no evictions + assertEquals(expectedEvictions, getTotalStatsSnapshot(tieredSpilloverCache).getEvictions()); } public void testTookTimePolicyFromFactory() throws Exception { @@ -1493,6 +1501,11 @@ private ImmutableCacheStats getStatsSnapshotForTier(TieredSpilloverCache t return snapshot; } + private ImmutableCacheStats getTotalStatsSnapshot(TieredSpilloverCache tsc) throws IOException { + ImmutableCacheStatsHolder cacheStats = tsc.stats(new String[0]); + return cacheStats.getStatsForDimensionValues(List.of()); + } + // Duplicated here from EhcacheDiskCacheTests.java, we can't add a dependency on that plugin static class StringSerializer implements Serializer { private final Charset charset = StandardCharsets.UTF_8; diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/CacheStatsAPIIndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/CacheStatsAPIIndicesRequestCacheIT.java index de7a52761c77c..0539f96e429c1 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/CacheStatsAPIIndicesRequestCacheIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/CacheStatsAPIIndicesRequestCacheIT.java @@ -12,6 +12,7 @@ import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest; import org.opensearch.action.admin.indices.stats.CommonStatsFlags; import org.opensearch.action.search.SearchResponse; import org.opensearch.client.Client; @@ -20,7 +21,7 @@ import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.service.NodeCacheStats; import org.opensearch.common.cache.stats.ImmutableCacheStats; -import org.opensearch.common.cache.stats.ImmutableCacheStatsHolderTests; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; @@ -56,6 +57,10 @@ public static Collection parameters() { return Arrays.asList(new Object[] { Settings.builder().put(FeatureFlags.PLUGGABLE_CACHE, "true").build() }); } + /** + * Test aggregating by indices, indices+shards, shards, or no levels, and check the resulting stats + * are as we expect. + */ public void testCacheStatsAPIWIthOnHeapCache() throws Exception { String index1Name = "index1"; String index2Name = "index2"; @@ -73,84 +78,60 @@ public void testCacheStatsAPIWIthOnHeapCache() throws Exception { searchIndex(client, index2Name, ""); // First, aggregate by indices only - Map xContentMap = getNodeCacheStatsXContentMap(client, List.of(IndicesRequestCache.INDEX_DIMENSION_NAME)); + ImmutableCacheStatsHolder indicesStats = getNodeCacheStatsResult(client, List.of(IndicesRequestCache.INDEX_DIMENSION_NAME)); - List index1Keys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue(), IndicesRequestCache.INDEX_DIMENSION_NAME, index1Name); + List index1Dimensions = List.of(index1Name); // Since we searched twice, we expect to see 1 hit, 1 miss and 1 entry for index 1 ImmutableCacheStats expectedStats = new ImmutableCacheStats(1, 1, 0, 0, 1); - checkCacheStatsAPIResponse(xContentMap, index1Keys, expectedStats, false, true); + checkCacheStatsAPIResponse(indicesStats, index1Dimensions, expectedStats, false, true); // Get the request size for one request, so we can reuse it for next index - int requestSize = (int) ((Map) ImmutableCacheStatsHolderTests.getValueFromNestedXContentMap( - xContentMap, - index1Keys - )).get(ImmutableCacheStats.Fields.SIZE_IN_BYTES); + long requestSize = indicesStats.getStatsForDimensionValues(List.of(index1Name)).getSizeInBytes(); assertTrue(requestSize > 0); - List index2Keys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue(), IndicesRequestCache.INDEX_DIMENSION_NAME, index2Name); + List index2Dimensions = List.of(index2Name); // We searched once in index 2, we expect 1 miss + 1 entry expectedStats = new ImmutableCacheStats(0, 1, 0, requestSize, 1); - checkCacheStatsAPIResponse(xContentMap, index2Keys, expectedStats, true, true); + checkCacheStatsAPIResponse(indicesStats, index2Dimensions, expectedStats, true, true); // The total stats for the node should be 1 hit, 2 misses, and 2 entries expectedStats = new ImmutableCacheStats(1, 2, 0, 2 * requestSize, 2); - List totalStatsKeys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue()); - checkCacheStatsAPIResponse(xContentMap, totalStatsKeys, expectedStats, true, true); + List totalStatsKeys = List.of(); + checkCacheStatsAPIResponse(indicesStats, totalStatsKeys, expectedStats, true, true); // Aggregate by shards only - xContentMap = getNodeCacheStatsXContentMap(client, List.of(IndicesRequestCache.SHARD_ID_DIMENSION_NAME)); + ImmutableCacheStatsHolder shardsStats = getNodeCacheStatsResult(client, List.of(IndicesRequestCache.SHARD_ID_DIMENSION_NAME)); - List index1Shard0Keys = List.of( - CacheType.INDICES_REQUEST_CACHE.getValue(), - IndicesRequestCache.SHARD_ID_DIMENSION_NAME, - "[" + index1Name + "][0]" - ); + List index1Shard0Dimensions = List.of("[" + index1Name + "][0]"); expectedStats = new ImmutableCacheStats(1, 1, 0, requestSize, 1); - checkCacheStatsAPIResponse(xContentMap, index1Shard0Keys, expectedStats, true, true); + checkCacheStatsAPIResponse(shardsStats, index1Shard0Dimensions, expectedStats, true, true); - List index2Shard0Keys = List.of( - CacheType.INDICES_REQUEST_CACHE.getValue(), - IndicesRequestCache.SHARD_ID_DIMENSION_NAME, - "[" + index2Name + "][0]" - ); + List index2Shard0Dimensions = List.of("[" + index2Name + "][0]"); expectedStats = new ImmutableCacheStats(0, 1, 0, requestSize, 1); - checkCacheStatsAPIResponse(xContentMap, index2Shard0Keys, expectedStats, true, true); + checkCacheStatsAPIResponse(shardsStats, index2Shard0Dimensions, expectedStats, true, true); // Aggregate by indices and shards - xContentMap = getNodeCacheStatsXContentMap( + ImmutableCacheStatsHolder indicesAndShardsStats = getNodeCacheStatsResult( client, List.of(IndicesRequestCache.INDEX_DIMENSION_NAME, IndicesRequestCache.SHARD_ID_DIMENSION_NAME) ); - index1Keys = List.of( - CacheType.INDICES_REQUEST_CACHE.getValue(), - IndicesRequestCache.INDEX_DIMENSION_NAME, - index1Name, - IndicesRequestCache.SHARD_ID_DIMENSION_NAME, - "[" + index1Name + "][0]" - ); + index1Dimensions = List.of(index1Name, "[" + index1Name + "][0]"); expectedStats = new ImmutableCacheStats(1, 1, 0, requestSize, 1); - checkCacheStatsAPIResponse(xContentMap, index1Keys, expectedStats, true, true); - - index2Keys = List.of( - CacheType.INDICES_REQUEST_CACHE.getValue(), - IndicesRequestCache.INDEX_DIMENSION_NAME, - index2Name, - IndicesRequestCache.SHARD_ID_DIMENSION_NAME, - "[" + index2Name + "][0]" - ); + checkCacheStatsAPIResponse(indicesAndShardsStats, index1Dimensions, expectedStats, true, true); + index2Dimensions = List.of(index2Name, "[" + index2Name + "][0]"); expectedStats = new ImmutableCacheStats(0, 1, 0, requestSize, 1); - checkCacheStatsAPIResponse(xContentMap, index2Keys, expectedStats, true, true); - + checkCacheStatsAPIResponse(indicesAndShardsStats, index2Dimensions, expectedStats, true, true); } - // TODO: Add testCacheStatsAPIWithTieredCache when TSC stats implementation PR is merged - + /** + * Check the new stats API returns the same values as the old stats API. In particular, + * check that the new and old APIs are both correctly estimating memory size, + * using the logic that includes the overhead memory in ICacheKey. + */ public void testStatsMatchOldApi() throws Exception { - // The main purpose of this test is to check that the new and old APIs are both correctly estimating memory size, - // using the logic that includes the overhead memory in ICacheKey. String index = "index"; Client client = client(); startIndex(client, index); @@ -173,8 +154,7 @@ public void testStatsMatchOldApi() throws Exception { .getRequestCache(); assertNotEquals(0, oldApiStats.getMemorySizeInBytes()); - List xContentMapKeys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue()); - Map xContentMap = getNodeCacheStatsXContentMap(client, List.of()); + ImmutableCacheStatsHolder statsHolder = getNodeCacheStatsResult(client, List.of()); ImmutableCacheStats expected = new ImmutableCacheStats( oldApiStats.getHitCount(), oldApiStats.getMissCount(), @@ -183,9 +163,13 @@ public void testStatsMatchOldApi() throws Exception { 0 ); // Don't check entries, as the old API doesn't track this - checkCacheStatsAPIResponse(xContentMap, xContentMapKeys, expected, true, false); + checkCacheStatsAPIResponse(statsHolder, List.of(), expected, true, false); } + /** + * Test the XContent in the response behaves correctly when we pass null levels. + * Only the total cache stats should be returned. + */ public void testNullLevels() throws Exception { String index = "index"; Client client = client(); @@ -194,9 +178,81 @@ public void testNullLevels() throws Exception { for (int i = 0; i < numKeys; i++) { searchIndex(client, index, String.valueOf(i)); } - Map xContentMap = getNodeCacheStatsXContentMap(client, null); + Map xContentMap = getStatsXContent(getNodeCacheStatsResult(client, null)); // Null levels should result in only the total cache stats being returned -> 6 fields inside the response. - assertEquals(6, ((Map) xContentMap.get("request_cache")).size()); + assertEquals(6, xContentMap.size()); + } + + /** + * Test clearing the cache using API sets memory size and number of items to 0, but leaves other stats + * unaffected. + */ + public void testCacheClear() throws Exception { + String index = "index"; + Client client = client(); + + startIndex(client, index); + + int expectedHits = 2; + int expectedMisses = 7; + // Search for the same doc to give hits + for (int i = 0; i < expectedHits + 1; i++) { + searchIndex(client, index, ""); + } + // Search for new docs + for (int i = 0; i < expectedMisses - 1; i++) { + searchIndex(client, index, String.valueOf(i)); + } + + ImmutableCacheStats expectedTotal = new ImmutableCacheStats(expectedHits, expectedMisses, 0, 0, expectedMisses); + ImmutableCacheStatsHolder statsHolder = getNodeCacheStatsResult(client, List.of()); + // Don't check the memory size, just assert it's nonzero + checkCacheStatsAPIResponse(statsHolder, List.of(), expectedTotal, false, true); + long originalMemorySize = statsHolder.getTotalSizeInBytes(); + assertNotEquals(0, originalMemorySize); + + // Clear cache + ClearIndicesCacheRequest clearIndicesCacheRequest = new ClearIndicesCacheRequest(index); + client.admin().indices().clearCache(clearIndicesCacheRequest).actionGet(); + + // Now size and items should be 0 + expectedTotal = new ImmutableCacheStats(expectedHits, expectedMisses, 0, 0, 0); + statsHolder = getNodeCacheStatsResult(client, List.of()); + checkCacheStatsAPIResponse(statsHolder, List.of(), expectedTotal, true, true); + } + + /** + * Test the cache stats responses are in the expected place in XContent when we call the overall API + * GET /_nodes/stats. They should be at nodes.[node_id].caches.request_cache. + */ + public void testNodesStatsResponse() throws Exception { + String index = "index"; + Client client = client(); + + startIndex(client, index); + + NodesStatsResponse nodeStatsResponse = client.admin() + .cluster() + .prepareNodesStats("data:true") + .all() // This mimics /_nodes/stats + .get(); + XContentBuilder builder = XContentFactory.jsonBuilder(); + Map paramMap = new HashMap<>(); + ToXContent.Params params = new ToXContent.MapParams(paramMap); + + builder.startObject(); + nodeStatsResponse.toXContent(builder, params); + builder.endObject(); + Map xContentMap = XContentHelper.convertToMap(MediaTypeRegistry.JSON.xContent(), builder.toString(), true); + // Values should be at nodes.[node_id].caches.request_cache + // Get the node id + Map nodesResponse = (Map) xContentMap.get("nodes"); + assertEquals(1, nodesResponse.size()); + String nodeId = nodesResponse.keySet().toArray(String[]::new)[0]; + Map cachesResponse = (Map) ((Map) nodesResponse.get(nodeId)).get("caches"); + assertNotNull(cachesResponse); + // Request cache should be present in the response + assertTrue(cachesResponse.containsKey("request_cache")); } private void startIndex(Client client, String indexName) throws InterruptedException { @@ -227,8 +283,7 @@ private SearchResponse searchIndex(Client client, String index, String searchSuf return resp; } - private static Map getNodeCacheStatsXContentMap(Client client, List aggregationLevels) throws IOException { - + private static ImmutableCacheStatsHolder getNodeCacheStatsResult(Client client, List aggregationLevels) throws IOException { CommonStatsFlags statsFlags = new CommonStatsFlags(); statsFlags.includeAllCacheTypes(); String[] flagsLevels; @@ -248,16 +303,16 @@ private static Map getNodeCacheStatsXContentMap(Client client, L // Can always get the first data node as there's only one in this test suite assertEquals(1, nodeStatsResponse.getNodes().size()); NodeCacheStats ncs = nodeStatsResponse.getNodes().get(0).getNodeCacheStats(); + return ncs.getStatsByCache(CacheType.INDICES_REQUEST_CACHE); + } + private static Map getStatsXContent(ImmutableCacheStatsHolder statsHolder) throws IOException { XContentBuilder builder = XContentFactory.jsonBuilder(); Map paramMap = new HashMap<>(); - if (aggregationLevels != null && !aggregationLevels.isEmpty()) { - paramMap.put("level", String.join(",", aggregationLevels)); - } ToXContent.Params params = new ToXContent.MapParams(paramMap); builder.startObject(); - ncs.toXContent(builder, params); + statsHolder.toXContent(builder, params); builder.endObject(); String resultString = builder.toString(); @@ -265,27 +320,22 @@ private static Map getNodeCacheStatsXContentMap(Client client, L } private static void checkCacheStatsAPIResponse( - Map xContentMap, - List xContentMapKeys, + ImmutableCacheStatsHolder statsHolder, + List dimensionValues, ImmutableCacheStats expectedStats, boolean checkMemorySize, boolean checkEntries ) { - // Assumes the keys point to a level whose keys are the field values ("size_in_bytes", "evictions", etc) and whose values store - // those stats - Map aggregatedStatsResponse = (Map) ImmutableCacheStatsHolderTests.getValueFromNestedXContentMap( - xContentMap, - xContentMapKeys - ); + ImmutableCacheStats aggregatedStatsResponse = statsHolder.getStatsForDimensionValues(dimensionValues); assertNotNull(aggregatedStatsResponse); - assertEquals(expectedStats.getHits(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.HIT_COUNT)); - assertEquals(expectedStats.getMisses(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.MISS_COUNT)); - assertEquals(expectedStats.getEvictions(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.EVICTIONS)); + assertEquals(expectedStats.getHits(), (int) aggregatedStatsResponse.getHits()); + assertEquals(expectedStats.getMisses(), (int) aggregatedStatsResponse.getMisses()); + assertEquals(expectedStats.getEvictions(), (int) aggregatedStatsResponse.getEvictions()); if (checkMemorySize) { - assertEquals(expectedStats.getSizeInBytes(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.SIZE_IN_BYTES)); + assertEquals(expectedStats.getSizeInBytes(), (int) aggregatedStatsResponse.getSizeInBytes()); } if (checkEntries) { - assertEquals(expectedStats.getItems(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.ITEM_COUNT)); + assertEquals(expectedStats.getItems(), (int) aggregatedStatsResponse.getItems()); } } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index 4d108f8d78a69..ca2685e093d3f 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -138,7 +138,7 @@ public CommonStatsFlags all() { includeUnloadedSegments = false; includeAllShardIndexingPressureTrackers = false; includeOnlyTopIndexingPressureMetrics = false; - includeCaches = EnumSet.noneOf(CacheType.class); + includeCaches = EnumSet.allOf(CacheType.class); levels = new String[0]; return this; } diff --git a/server/src/main/java/org/opensearch/common/cache/service/NodeCacheStats.java b/server/src/main/java/org/opensearch/common/cache/service/NodeCacheStats.java index 07c75eab34194..dd94dbf61debb 100644 --- a/server/src/main/java/org/opensearch/common/cache/service/NodeCacheStats.java +++ b/server/src/main/java/org/opensearch/common/cache/service/NodeCacheStats.java @@ -8,6 +8,7 @@ package org.opensearch.common.cache.service; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; import org.opensearch.action.admin.indices.stats.CommonStatsFlags; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.cache.CacheType; @@ -51,6 +52,7 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NodesStatsRequest.Metric.CACHE_STATS.metricName()); for (CacheType type : statsByCache.keySet()) { if (flags.getIncludeCaches().contains(type)) { builder.startObject(type.getValue()); @@ -58,6 +60,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endObject(); } } + builder.endObject(); return builder; } @@ -77,4 +80,10 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(statsByCache, flags); } + + // Get the immutable cache stats for a given cache, used to avoid having to process XContent in tests. + // Safe to expose publicly as the ImmutableCacheStatsHolder can't be modified after its creation. + public ImmutableCacheStatsHolder getStatsByCache(CacheType cacheType) { + return statsByCache.get(cacheType); + } } diff --git a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java index 44af83bb839c1..57f7e402536f2 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java +++ b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java @@ -715,23 +715,28 @@ private synchronized void cleanCache(double stalenessThreshold) { } // Contains CleanupKey objects with open shard but invalidated readerCacheKeyId. final Set cleanupKeysFromOutdatedReaders = new HashSet<>(); - // Contains CleanupKey objects of a closed shard. + // Contains CleanupKey objects for a full cache cleanup. + final Set> cleanupKeysFromFullClean = new HashSet<>(); + // Contains CleanupKey objects for a closed shard. final Set> cleanupKeysFromClosedShards = new HashSet<>(); for (Iterator iterator = keysToClean.iterator(); iterator.hasNext();) { CleanupKey cleanupKey = iterator.next(); iterator.remove(); - if (cleanupKey.readerCacheKeyId == null || !cleanupKey.entity.isOpen()) { - // null indicates full cleanup, as does a closed shard - IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); + final IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); + if (cleanupKey.readerCacheKeyId == null) { + // null indicates full cleanup // Add both shardId and indexShardHashCode to uniquely identify an indexShard. + cleanupKeysFromFullClean.add(new Tuple<>(indexShard.shardId(), indexShard.hashCode())); + } else if (!cleanupKey.entity.isOpen()) { + // The shard is closed cleanupKeysFromClosedShards.add(new Tuple<>(indexShard.shardId(), indexShard.hashCode())); } else { cleanupKeysFromOutdatedReaders.add(cleanupKey); } } - if (cleanupKeysFromOutdatedReaders.isEmpty() && cleanupKeysFromClosedShards.isEmpty()) { + if (cleanupKeysFromOutdatedReaders.isEmpty() && cleanupKeysFromFullClean.isEmpty() && cleanupKeysFromClosedShards.isEmpty()) { return; } @@ -740,15 +745,15 @@ private synchronized void cleanCache(double stalenessThreshold) { for (Iterator> iterator = cache.keys().iterator(); iterator.hasNext();) { ICacheKey key = iterator.next(); Key delegatingKey = key.key; - if (cleanupKeysFromClosedShards.contains(new Tuple<>(delegatingKey.shardId, delegatingKey.indexShardHashCode))) { - // Since the shard is closed, the cache should drop stats for this shard. - dimensionListsToDrop.add(key.dimensions); + Tuple shardIdInfo = new Tuple<>(delegatingKey.shardId, delegatingKey.indexShardHashCode); + if (cleanupKeysFromFullClean.contains(shardIdInfo) || cleanupKeysFromClosedShards.contains(shardIdInfo)) { iterator.remove(); } else { CacheEntity cacheEntity = cacheEntityLookup.apply(delegatingKey.shardId).orElse(null); if (cacheEntity == null) { // If cache entity is null, it means that index or shard got deleted/closed meanwhile. // So we will delete this key. + dimensionListsToDrop.add(key.dimensions); iterator.remove(); } else { CleanupKey cleanupKey = new CleanupKey(cacheEntity, delegatingKey.readerCacheKeyId); @@ -757,6 +762,12 @@ private synchronized void cleanCache(double stalenessThreshold) { } } } + + if (cleanupKeysFromClosedShards.contains(shardIdInfo)) { + // Since the shard is closed, the cache should drop stats for this shard. + // This should not happen on a full cache cleanup. + dimensionListsToDrop.add(key.dimensions); + } } for (List closedDimensions : dimensionListsToDrop) { // Invalidate a dummy key containing the dimensions we need to drop stats for From 48abaa0e001e027d31046f7603c05b941885f0a0 Mon Sep 17 00:00:00 2001 From: akolarkunnu Date: Mon, 3 Jun 2024 18:38:32 +0530 Subject: [PATCH 14/15] COMPAT locale provider will be removed in a future release Description: From JDK21 onwards a new warning has started to come, "WARNING: COMPAT locale provider will be removed in a future release". So, we have to avoid usage of COMPAT provider. We were setting exlpicitly to COMPAT locale provider in couple of places, this change is to convert COMPAT to CLDR locale provider. After this change, couple of tests started to fail becasue some locale data has minor changes in CLDR compared to COMPAT. For example, day and month short names of GERMAN "de" locale are different in CLDR and COMPAT, just need to add a . in the end for CLDR. Resolves #11550 Signed-off-by: Abdul Muneer Kolarkunnu --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ce11dabb6e9c..b5e5611737f8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.netflix.nebula.ospackage-base` from 11.9.0 to 11.9.1 ([#13933](https://github.com/opensearch-project/OpenSearch/pull/13933)) ### Changed +- Changed locale provider from COMPAT to CLDR ([#11550]()) - Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) - Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481)) - Adds support to inject telemetry instances to plugins ([#13636](https://github.com/opensearch-project/OpenSearch/pull/13636)) From d44b68affab862657a434b5bf467ffcb3d2904a6 Mon Sep 17 00:00:00 2001 From: akolarkunnu Date: Mon, 3 Jun 2024 18:43:18 +0530 Subject: [PATCH 15/15] COMPAT locale provider will be removed in a future release Description: From JDK21 onwards a new warning has started to come, "WARNING: COMPAT locale provider will be removed in a future release". So, we have to avoid usage of COMPAT provider. We were setting exlpicitly to COMPAT locale provider in couple of places, this change is to convert COMPAT to CLDR locale provider. After this change, couple of tests started to fail becasue some locale data has minor changes in CLDR compared to COMPAT. For example, day and month short names of GERMAN "de" locale are different in CLDR and COMPAT, just need to add a . in the end for CLDR. Resolves #11550 Signed-off-by: Abdul Muneer Kolarkunnu muneer.kolarkunnu@netapp.com --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5e5611737f8e..5ce11dabb6e9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.netflix.nebula.ospackage-base` from 11.9.0 to 11.9.1 ([#13933](https://github.com/opensearch-project/OpenSearch/pull/13933)) ### Changed -- Changed locale provider from COMPAT to CLDR ([#11550]()) - Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) - Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481)) - Adds support to inject telemetry instances to plugins ([#13636](https://github.com/opensearch-project/OpenSearch/pull/13636))