From d18b434e626b85d7351cd8d8181d798346e12b75 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 14 Jul 2020 22:31:07 +0200 Subject: [PATCH] Remove Artificially Low Chunk Size Limits from GCS + Azure Blob Stores (#59279) (#59564) Removing these limits as they cause unnecessarily many object in the blob stores. We do not have to worry about BwC of this change since we do not support any 3rd party implementations of Azure or GCS. Also, since there is no valid reason to set a different than the default maximum chunk size at this point, removing the documentation (which was incorrect in the case of Azure to begin with) for the setting from the docs. Closes #56018 --- docs/plugins/repository-azure.asciidoc | 12 ++++++++---- docs/plugins/repository-gcs.asciidoc | 8 ++++++-- .../repositories/azure/AzureStorageService.java | 6 ++++-- .../azure/AzureRepositorySettingsTests.java | 5 +++-- .../gcs/GoogleCloudStorageRepository.java | 7 ++++++- .../GoogleCloudStorageBlobStoreRepositoryTests.java | 4 ++-- 6 files changed, 29 insertions(+), 13 deletions(-) diff --git a/docs/plugins/repository-azure.asciidoc b/docs/plugins/repository-azure.asciidoc index 4700f8bb1d049..e7e5b7a8ceaec 100644 --- a/docs/plugins/repository-azure.asciidoc +++ b/docs/plugins/repository-azure.asciidoc @@ -30,7 +30,7 @@ bin/elasticsearch-keystore add azure.client.secondary.sas_token ---------------------------------------------------------------- For more information about these settings, see -<>. +<>. [IMPORTANT] .Supported Azure Storage Account types @@ -99,7 +99,7 @@ stored in the keystore are marked as "secure"; the other settings belong in the `account` ({ref}/secure-settings.html[Secure], {ref}/secure-settings.html#reloadable-secure-settings[reloadable]):: The Azure account name, which is used by the repository's internal Azure client. - + `endpoint_suffix`:: The Azure endpoint suffix to connect to. The default value is `core.windows.net`. @@ -164,9 +164,13 @@ The Azure repository supports following settings: `chunk_size`:: - Big files can be broken down into chunks during snapshotting if needed. + Big files can be broken down into multiple smaller blobs in the blob store during snapshotting. + It is not recommended to change this value from its default unless there is an explicit reason for limiting the + size of blobs in the repository. Setting a value lower than the default can result in an increased number of API + calls to the Azure blob store during snapshot create as well as restore operations compared to using the default + value and thus make both operations slower as well as more costly. Specify the chunk size as a value and unit, for example: - `10MB`, `5KB`, `500B`. Defaults to `64MB` (64MB max). + `10MB`, `5KB`, `500B`. Defaults to the maximum size of a blob in the Azure blob store which is `5TB`. `compress`:: diff --git a/docs/plugins/repository-gcs.asciidoc b/docs/plugins/repository-gcs.asciidoc index 6ef7a20ab14f0..a02a7a2034dfc 100644 --- a/docs/plugins/repository-gcs.asciidoc +++ b/docs/plugins/repository-gcs.asciidoc @@ -228,9 +228,13 @@ The following settings are supported: `chunk_size`:: - Big files can be broken down into chunks during snapshotting if needed. + Big files can be broken down into multiple smaller blobs in the blob store during snapshotting. + It is not recommended to change this value from its default unless there is an explicit reason for limiting the + size of blobs in the repository. Setting a value lower than the default can result in an increased number of API + calls to the Google Cloud Storage Service during snapshot create as well as restore operations compared to using + the default value and thus make both operations slower as well as more costly. Specify the chunk size as a value and unit, for example: - `10MB` or `5KB`. Defaults to `100MB`, which is the maximum permitted. + `10MB`, `5KB`, `500B`. Defaults to the maximum size of a blob in the Google Cloud Storage Service which is `5TB`. `compress`:: diff --git a/plugins/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureStorageService.java b/plugins/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureStorageService.java index dd2e3b2d6273d..29e6ae3458f2c 100644 --- a/plugins/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureStorageService.java +++ b/plugins/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureStorageService.java @@ -20,6 +20,7 @@ package org.elasticsearch.repositories.azure; import com.microsoft.azure.storage.CloudStorageAccount; +import com.microsoft.azure.storage.Constants; import com.microsoft.azure.storage.OperationContext; import com.microsoft.azure.storage.RetryPolicy; import com.microsoft.azure.storage.RetryPolicyFactory; @@ -44,10 +45,11 @@ public class AzureStorageService { public static final ByteSizeValue MIN_CHUNK_SIZE = new ByteSizeValue(1, ByteSizeUnit.BYTES); + /** - * {@link com.microsoft.azure.storage.blob.BlobConstants#MAX_SINGLE_UPLOAD_BLOB_SIZE_IN_BYTES} + * Maximum allowed blob size in Azure blob store. */ - public static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(256, ByteSizeUnit.MB); + public static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(Constants.MAX_BLOB_SIZE, ByteSizeUnit.BYTES); // 'package' for testing volatile Map storageSettings = emptyMap(); diff --git a/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java b/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java index 1f2f2ae9b8adb..9920d4c3e46c2 100644 --- a/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java +++ b/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java @@ -123,8 +123,9 @@ public void testChunkSize() { // greater than max chunk size not allowed e = expectThrows(IllegalArgumentException.class, () -> - azureRepository(Settings.builder().put("chunk_size", "257mb").build())); - assertEquals("failed to parse value [257mb] for setting [chunk_size], must be <= [256mb]", e.getMessage()); + azureRepository(Settings.builder().put("chunk_size", "6tb").build())); + assertEquals("failed to parse value [6tb] for setting [chunk_size], must be <= [" + + AzureStorageService.MAX_CHUNK_SIZE.getStringRep() + "]", e.getMessage()); } } diff --git a/plugins/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java b/plugins/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java index 7e033c47f20db..bcad2a5f937d7 100644 --- a/plugins/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java +++ b/plugins/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java @@ -45,7 +45,12 @@ class GoogleCloudStorageRepository extends BlobStoreRepository { // package private for testing static final ByteSizeValue MIN_CHUNK_SIZE = new ByteSizeValue(1, ByteSizeUnit.BYTES); - static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(100, ByteSizeUnit.MB); + + /** + * Maximum allowed object size in GCS. + * @see GCS documentation for details. + */ + static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(5, ByteSizeUnit.TB); static final String TYPE = "gcs"; diff --git a/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 595051340417e..8ab51a462842b 100644 --- a/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -184,10 +184,10 @@ public void testChunkSize() { // greater than max chunk size not allowed e = expectThrows(IllegalArgumentException.class, () -> { final RepositoryMetadata repoMetadata = new RepositoryMetadata("repo", GoogleCloudStorageRepository.TYPE, - Settings.builder().put("chunk_size", "101mb").build()); + Settings.builder().put("chunk_size", "6tb").build()); GoogleCloudStorageRepository.getSetting(GoogleCloudStorageRepository.CHUNK_SIZE, repoMetadata); }); - assertEquals("failed to parse value [101mb] for setting [chunk_size], must be <= [100mb]", e.getMessage()); + assertEquals("failed to parse value [6tb] for setting [chunk_size], must be <= [5tb]", e.getMessage()); } public void testWriteReadLarge() throws IOException {